From ef3b7a6558ec57b2e4c2023dae2841082835ad85 Mon Sep 17 00:00:00 2001
From: Akira Kawata <akirakawata1@gmail.com>
Date: Tue, 6 Feb 2024 22:34:52 +0900
Subject: [PATCH 1/6] Generate DBs of all versions

---
 jendeley-backend/.npmignore                   |  1 +
 .../scripts/generated_DBs_for_all_versions.sh | 79 +++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100755 jendeley-backend/scripts/generated_DBs_for_all_versions.sh

diff --git a/jendeley-backend/.npmignore b/jendeley-backend/.npmignore
index 7d71e45..daeb82b 100644
--- a/jendeley-backend/.npmignore
+++ b/jendeley-backend/.npmignore
@@ -18,3 +18,4 @@ benchmark.svg
 perf.data*
 processed-isolate-*.txt
 processed-isolate-*.txt
+generated_DBs
diff --git a/jendeley-backend/scripts/generated_DBs_for_all_versions.sh b/jendeley-backend/scripts/generated_DBs_for_all_versions.sh
new file mode 100755
index 0000000..55c95ea
--- /dev/null
+++ b/jendeley-backend/scripts/generated_DBs_for_all_versions.sh
@@ -0,0 +1,79 @@
+#!/bin/bash -eux
+
+rootdir=$(git rev-parse --show-toplevel)
+versions=(
+  # TODO: Do not skip pre 1.0.0 versions
+  # "0.0.1"
+  # "0.0.2"
+  # "0.0.3"
+  # "0.0.4"
+  # "0.0.5"
+  # "0.0.6"
+  # "0.0.7"
+  # "0.0.8"
+  # "0.0.9"
+  # "0.0.10"
+  # "0.0.11"
+  # "0.0.12"
+  # "0.0.13"
+  # "0.0.14"
+  # "0.0.15"
+  # "0.0.16"
+  # "0.0.17"
+  # "0.0.18"
+  # "0.0.19"
+  # "0.0.20"
+  # "0.0.21"
+  # "0.0.22"
+  # "0.0.23"
+  # "0.0.24"
+  # "0.0.25"
+  # "0.0.26"
+  # "0.0.27"
+  "1.0.0"
+  "1.0.1"
+  "1.0.2"
+  "1.0.3"
+  "1.0.4"
+  "1.0.5"
+  "1.0.6"
+  "1.0.7"
+  "1.0.8"
+  "1.0.9"
+  "1.0.10"
+  "1.1.0"
+  "1.1.1"
+  "1.2.0"
+  "1.3.0"
+  "2.0.2"
+  "2.0.4"
+  "2.0.5"
+  "2.0.6"
+  "2.0.7"
+  "2.0.9"
+  "2.0.10"
+  "2.0.12"
+  "2.1.1"
+  "2.1.2"
+  "2.1.3"
+  "2.2.0"
+)
+
+mkdir -p ${rootdir}/jendeley-backend/generated_DBs
+process_list=()
+for version in "${versions[@]}"; do
+    echo "Generating DB for version $version"
+    workdir=$(mktemp -d -t gen_DBs-for-all-versions-${version}-XXXXXXXXXX)
+    cp -r ${rootdir}/jendeley-backend/test_pdfs ${workdir}/test_pdfs
+    rm -rf ${workdir}/test_pdfs/jendeley_db.json
+    echo "#! /bin/bash -eux" > ${workdir}/gen_DB.sh
+    echo "npm install -g @a_kawashiro/jendeley@${version}" >> ${workdir}/gen_DB.sh
+    echo "jendeley scan --papers_dir /workdir/test_pdfs --book_dirs /workdir/test_pdfs/dummyTapl" >> ${workdir}/gen_DB.sh
+    docker run --volume /${workdir}:/workdir --rm node:21 bash /workdir/gen_DB.sh && cp ${workdir}/test_pdfs/jendeley_db.json ${rootdir}/jendeley-backend/generated_DBs/jendeley_db_${version}.json &
+    process_id=$!
+    process_list+=(${process_id})
+done
+
+for p in "${process_list[@]}"; do
+    wait ${p}
+done

From 72eb6faf4a94fe7c24b0368529dd49bf8d2c6af4 Mon Sep 17 00:00:00 2001
From: Akira Kawata <akirakawata1@gmail.com>
Date: Tue, 6 Feb 2024 22:35:09 +0900
Subject: [PATCH 2/6] Add all generated versions

---
 .../generated_DBs/jendeley_db_1.0.0.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.1.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.10.json     | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.2.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.3.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.4.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.5.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.6.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.7.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.8.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.0.9.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.1.0.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.1.1.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.2.0.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_1.3.0.json      | 1555 ++++++++++++++++
 .../generated_DBs/jendeley_db_2.0.10.json     | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.0.12.json     | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.0.2.json      | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.0.4.json      | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.0.5.json      | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.0.6.json      | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.0.7.json      | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.0.9.json      | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.1.1.json      | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.1.2.json      | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.1.3.json      | 1567 +++++++++++++++++
 .../generated_DBs/jendeley_db_2.2.0.json      | 1567 +++++++++++++++++
 27 files changed, 42129 insertions(+)
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.0.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.1.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.10.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.2.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.3.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.4.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.5.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.6.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.7.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.8.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.0.9.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.1.0.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.1.1.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.2.0.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_1.3.0.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.0.10.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.0.12.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.0.2.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.0.4.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.0.5.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.0.6.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.0.7.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.0.9.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.1.1.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.1.2.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.1.3.json
 create mode 100644 jendeley-backend/generated_DBs/jendeley_db_2.2.0.json

diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.0.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.0.json
new file mode 100644
index 0000000..5a963b8
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.0.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.0"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.1.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.1.json
new file mode 100644
index 0000000..6fa1618
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.1.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.1"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.10.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.10.json
new file mode 100644
index 0000000..8506e50
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.10.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.10"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.2.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.2.json
new file mode 100644
index 0000000..4c95503
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.2.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.2"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.3.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.3.json
new file mode 100644
index 0000000..56f05bc
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.3.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.3"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.4.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.4.json
new file mode 100644
index 0000000..9f39c1a
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.4.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.4"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.5.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.5.json
new file mode 100644
index 0000000..fc01bf3
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.5.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.5"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.6.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.6.json
new file mode 100644
index 0000000..6fc0693
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.6.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.6"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.7.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.7.json
new file mode 100644
index 0000000..e5bbeb4
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.7.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.7"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.8.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.8.json
new file mode 100644
index 0000000..18ca0be
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.8.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.8"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.9.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.9.json
new file mode 100644
index 0000000..0255229
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.9.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.0.9"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.1.0.json b/jendeley-backend/generated_DBs/jendeley_db_1.1.0.json
new file mode 100644
index 0000000..53cc794
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.1.0.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.1.0"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.1.1.json b/jendeley-backend/generated_DBs/jendeley_db_1.1.1.json
new file mode 100644
index 0000000..7810285
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.1.1.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.1.1"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.2.0.json b/jendeley-backend/generated_DBs/jendeley_db_1.2.0.json
new file mode 100644
index 0000000..72534a6
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.2.0.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.2.0"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.3.0.json b/jendeley-backend/generated_DBs/jendeley_db_1.3.0.json
new file mode 100644
index 0000000..130fce7
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_1.3.0.json
@@ -0,0 +1,1555 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "1.3.0"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "comments": ""
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.0.10.json b/jendeley-backend/generated_DBs/jendeley_db_2.0.10.json
new file mode 100644
index 0000000..0b6329c
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.0.10.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.0.10"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.0.12.json b/jendeley-backend/generated_DBs/jendeley_db_2.0.12.json
new file mode 100644
index 0000000..ebd4b18
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.0.12.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.0.12"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.0.2.json b/jendeley-backend/generated_DBs/jendeley_db_2.0.2.json
new file mode 100644
index 0000000..1f6c326
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.0.2.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.0.2"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.0.4.json b/jendeley-backend/generated_DBs/jendeley_db_2.0.4.json
new file mode 100644
index 0000000..93eab81
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.0.4.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.0.4"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.0.5.json b/jendeley-backend/generated_DBs/jendeley_db_2.0.5.json
new file mode 100644
index 0000000..478fa1d
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.0.5.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.0.5"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.0.6.json b/jendeley-backend/generated_DBs/jendeley_db_2.0.6.json
new file mode 100644
index 0000000..75986a1
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.0.6.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.0.6"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.0.7.json b/jendeley-backend/generated_DBs/jendeley_db_2.0.7.json
new file mode 100644
index 0000000..a9ac2d4
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.0.7.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.0.7"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.0.9.json b/jendeley-backend/generated_DBs/jendeley_db_2.0.9.json
new file mode 100644
index 0000000..9d909b4
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.0.9.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.0.9"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.1.1.json b/jendeley-backend/generated_DBs/jendeley_db_2.1.1.json
new file mode 100644
index 0000000..2c27c2c
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.1.1.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.1.1"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.1.2.json b/jendeley-backend/generated_DBs/jendeley_db_2.1.2.json
new file mode 100644
index 0000000..fa3e9ed
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.1.2.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.1.2"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.1.3.json b/jendeley-backend/generated_DBs/jendeley_db_2.1.3.json
new file mode 100644
index 0000000..24ac1cd
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.1.3.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.1.3"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file
diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.2.0.json b/jendeley-backend/generated_DBs/jendeley_db_2.2.0.json
new file mode 100644
index 0000000..1205f69
--- /dev/null
+++ b/jendeley-backend/generated_DBs/jendeley_db_2.2.0.json
@@ -0,0 +1,1567 @@
+{
+  "jendeley_meta": {
+    "idType": "meta",
+    "version": "2.2.0"
+  },
+  "arxiv_2212.12976": {
+    "path": [
+      "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1  [cs.LO]  26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety.  For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks.  However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer.  Even most expert programmers make mistakes and\na  memory  safety  bug  in  anunsafeblock  renders  all  the  type  system  guarantees  void.   To  address  this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm.  This text outlines our approach and the progress that has been made so far.\nContents\n1  Introduction2\n2  Unsafe Code and Safe Abstractions3\n2.1    Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .3\n2.2    Unsound Unsafe   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .5\n3  Modular Symbolic Execution (MSE)6\n3.1    Concrete Execution .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .6\n3.2    Symbolic Execution    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.3    Modular Symbolic Execution   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .9\n3.4    Modular Symbolic Execution and Verifying Safe Abstractions  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .11\n4  RustBelt12\n4.1    RustBelt’s semantic model and MSE  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .12\n5  Implementation15\n5.1    Executing MIR  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.2    Executing MIR in VeriFast   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n5.3    Added value with respect to RustBelt   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .16\n6  Future Plans17\n6.1    Rigorous Soundness   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n6.2    Panic Safety and Stack Unwinding   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .18\n7  Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1  Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well.  To achieve these all together has\nturned out not to be that easy, however.  Other common programming languages usually trade these goals, one\nto another.  Fortunately, Rust’s safety claims have been proven to be legitimate.  The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust.  At the same time, Rust has proven it is not just a research language.  It has found\nits  way  to  the  wild  and  in  fact,  is  getting  ever  more  popular.   It  shows  the  language  is  simple  enough  for\ndevelopers and industry to use.  So, it is completely fair to say Rust shows great promises.  The main reason\nbehind this success is the language type system.  Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing.  In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety.  The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems.  But, does it?  In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing.  To provide a complete\nset of expected functionalities from a modern programming language and performance improvements,  Rust\nintroducesunsafecode  blocks.   The  type  checker  checks  these  blocks  with  some  relaxations  to  allow  the\nimplementation of such functionalities.  The cost of these relaxations is that programmers themselves should\nmake  sure  theunsafeblocks  do  not  cause  the  program  to  exhibitundefined  behaviour(UB).  Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse.  It is effective but not enough.  There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary.  In this text,unsafecode refers to code\nenclosed in anunsafeblock.  It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined.  FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular  Symbolic  Execution(MSE) algorithm.  VeriFast is\na  research  tool  for  verifying  the  safety  and  functional  properties  of  C  and  Java  programs  using  MSE.  We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker.  The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races.  We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1.  we  translate  Rust’s  rich  type  system’s  meaning  into  Separation  Logic  formulae.   For  the  meaning  of\nRust’s types we are relying on the semantics provided by RustBelt.  Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2.  we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode.   The  user  can  also  guide  the  MSE  algorithm  with  lemmas  and  ghost  commands  to  verify  more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs.  We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae.  It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn  the  rest  of  this  text,  in  Section  2  we  take  a  tour  ofunsafeRust  code,  safe  abstractions,  and  their\npotential unsoundness.  Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks.  In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions.  We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm.  Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt.  In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future.  Eventually, we\nconclude in Section 7.\n2  Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership.  Listing 1 shows these different types of ownership\nof a vector.  The most basic type of ownership isexclusive ownership.  Owner variables, e.g.v, have exclusive\nownership.  An active owner variable rules out aliasing entirely.  The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope.  We can transfer the ownership to other\nfunctions/threads.  But it is still not that expressive.  To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases.  To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily.  A mutable reference grantstemporary  exclusive  ownership.  In our\nexample,mrvgives  us  write  access.   We  mutate  the  vector  inside  functionpush_fourthrough  the  passed\nmutable reference,mrv.  Oncemrvis out of scope, the ownervgets its exclusive ownership back again.  Owners\nand mutable references, representing exclusive ownership, rule out aliasing.  However, aliasing is needed to give\naccess to multiple threads to the same memory location.  To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming.  Shared references are the Rust’s answer\nfor aliasing.  Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time.  To preserve memory safety shared references rule out mutation.\nAll  of  the  references  in  Rust  have  alifetimein  their  type.'lin  the  type&'l mut i32is  a  lifetime.\nLifetimes represent a continuous range of program execution steps.  Type system’s guarantees about references\nhold,  as  long  as  their  lifetime  is  alive.   Look  at  the  signature  of  the  functionpush_four.   It  has  a  lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec<i32>.  Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller.  They are “another kind of generics”\n[10], in the sense that they are not run-time variables.  They get instantiated at compile-time, i.e.  when we\ncall a function with a lifetime parameter,  the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter.  In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1.  It is a suitable lifetime for instantiatingpush_four’s lifetime parameter.  One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever,  communication  between  threads  needs  mutation  and  aliasing  together.   As  an  example  consider\naMutex.   We  need  to  have  references  to  it  in  different  threads,  aliasing,  and  we  need  to  lock  it  in  those\nthreads, mutation.  To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules.  Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few.  To address expressivity besides\nsafety Rust introducesunsafecode, i.e.  code blocks annotated with theunsafekeyword.  The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker,  but  with  some  relaxation.   The  The  Rust  Programming  Language  [10]  book  mentions  five  actions\nyou can take just inunsafecode and calls themunsafe superpowers.  Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn  this  project,  among  primitive  unsafe  constructs,  we  will  initially  focus  on  supportingunsafecode\ninvolvingdereferencing raw pointers.  The two others are used relatively rarely.  Raw pointers are similar to C\npointers.  Rust’s borrow checker does not track them and they can be null or dangling.  Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}.  The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype  system  cannot  check  and  it  is  up  to  the  programmer  to  make  sure  they  have  been  met.   Anunsafe\nfunction’s body is anunsafecode block.  Usingunsafefunctions propagates theunsafecode to the callers.\n2.1  Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe.  But it should stop at some point.  Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system.  It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety.  It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec<i32>) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec<i32> = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec<i32> = &mut v;//               |\n/***                                              |\n* mrv is a mutable borrow of v                   |\n* as long as this borrow is alive it            [l1]\n* is not possible to access                      |\n* the vector through v                           |\n*/ //                                            |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec<i32> = &v;//                       |\n/***                                              |\n* srv is a shared/immutable borrow of v          |\n* the vector cannot get mutated as long as       |\n* it is borrowed by any immutable borrow         |\n*/ //                                            |\n{//----------------------------------------       |\nlet first: &i32 =//                  |       |\nv.first().unwrap();//            |       |\n/***                                  |      [l2]\n* multiple shared references,        |       |\n* borrowing from the same owner,     |       |\n* can coexist                       [l3]     |\n*/ //                                |       |\nprintln!(\"{} is the first in {:?}\",//|       |\nfirst, srv);//                   |       |\n}//----------------------------------------       |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1:  Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2:  A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function.  Such a function would be asafe abstraction.  Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents.  In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type.  However, it is not the case for a safe abstraction.  It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world.  As an example,\nlet us look at Listing 2.  The methodsetis a safe abstraction.  Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell.  While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object.  The code mutates the contents of memory through a shared reference!  It is in contradiction\nwith the core rules of the type system.  Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime.  But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue.  If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue.  There are other type checks regarding sending\nownership and borrows to other threads.  Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover,  no public method inCelllibrary leaks a reference to the internal state of aCellobject.  That\nprevents sendingdeep  pointersof theCellto other threads.  These together means libraryCellholds the\nfollowing property:  All aliases of aCellobject remain in the same thread.  That would be ourCelllibrary\ninvariant.   The  usage  ofunsafecode  inCelllibrary  is  sound  and  abstracts  away  theunsafeblock.   The\nlibrary  adds  the  functionality  of  mutation  through  shared  reference,  but  because  of  its  invariant,  it  is  still\nsafe.  Safe code can useCellobjects without the necessity of taking care of memory safety.  Our example is\nclose to what the realstd::cell::Cellin the standard library is.  Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants.  Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2  Unsound Unsafe\nNot allunsafeusages are sound.  It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes.  Listing\n3 shows examples for both cases.  The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3:  Unsoundunsafecode examples\nmemory.  However, it violates the type system guarantees that type checker always assume when it checks safe\ncode.  In such cases, the problem might show up in the execution of safe code.  In general, writing soundunsafe\ncode is very difficult,  especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3  Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically.  But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers.  Programmers who develop these libraries, being human, make\nmistakes.  A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders  all  of  the  type  system’s  guarantees  void.   Here  is  the  point  we  are  targeting  to  contribute  to  Rust\nsafety.  To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types.  The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans.  Here, arises a more fundamental question.  What do Rust types mean?  We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning.  Fortunately,  we do not\nneed to propose an answer from scratch.  RustBelt [8] already suggests formal semantics for Rust’s types.  In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms.  Later,  in Section 4 we briefly discuss RustBelt [8],  a well-respected work that suggests a formal\nsemantic model for Rust’s types.  Moreover,  we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers.  In Rust,  having a raw\npointer does not guarantee anything about the memory it points to, e.g.  the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque.  That means trying to verify this\nexample  we  would  need  to  checkcreate_deque’s  body  against  fewer  type-induced  proof  obligations  which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1  Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees.  In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation.  Let us assume we chose the most naive solution.  We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value.  Heap is an accounting of the abstract machine’s\nmemory.  Mathematically, Heap is amultisetof heap chunks.  Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4:  A Deque, implemented just usingunsafeRust\nthat represent information about the memory.  We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms.  We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction.  Store and Heap are empty at the beginning and\nthe  first  statement  islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;.   From  the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1.  The address stored insentinelis notnull\n2.  The address stored insentinelis aligned\n3.  Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4.  Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter  the  execution  of  this  line,  there  are  different  possible  machine  states.   In  one  state,  the  value  in  the\nsentinelcould  benull,  in  another  one0x1000,  and  in  another  one0x12345.   In  the  states  where  the\nsentinel’s  value  is  notnull,  there  are  chunks,  batches  of  bytes,  allocated  in  Heap  that  our  program  is\nallowed to access.  But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes.  The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values.  To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e.  does not verify the program.\nIt  is  worth  noting  we  do  not  want  to  verify  our  program  against  a  specific  concrete  machine,  and  it\nmeans the set of possible addresses is practically infinite.  Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code.  We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement.  Figure 1 shows theconcrete  execution  treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1)  means  there  is  an  allocated  block  of  memory  starting  from  address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1:  The concrete execution tree of functioncreate_dequein Listing 4.  The predicate names have been\nabbreviated  in  this  figure  as  follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv,  and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e.  we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e.  the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat  the  address  stored  insentinel,  on  this  path0x1.   This  write  is  safe  because  in  our  Heap  we  have  the\npredicateNode\nprev(0x1,h).  After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so.  The next write operation\nis safe similarly.  The final statement isreturn sentinel;.  Representing the return procedure involves many\n8\n\ndetails.  Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example.  Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap.  The execution finished successfully and\nthis path is fine.  Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2  Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one.  To do so we usesymbols instead of\nconcrete  values.   Using  symbols,  we  forget  about  corresponding  concrete  values,  but  we  still  remember  the\nfacts that hold for all of them.  In this text, we typeset symbols likêsym, to make them distinct.  Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand  also  store  the  facts  we  know  about  it.   We  will  have  a  single  symbolic  execution  path  for  the  case  of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful.  In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced.  To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond.  The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it.  Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution.  It is modelling of\nthe concrete execution.  Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address.  Both symbolic execution paths ofcreate_dequeare safe.  The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3  Modular Symbolic Execution\nThe  preceding  subsection  showed  how  symbolic  execution  algorithm  successfully  verifiescreate_deque.   It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized.  Now, what if we try to verify a program that callscreate_deque\nseveral times.  Executing the body of functions over and over is a waste.  Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate.  We also like to verify our programs\nin a modular way, e.g.  it is not pleasant to get involved with internal states of callees when we try to verify\na caller.  It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it.  Then instead of executing the body every time the function gets called, we can\nreuse  that  knowledge  to  infer  what  would  be  the  state  of  execution  if  the  call  returns.   This  knowledge  is\ncalledfunction contract.  Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e.  set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould  be  safe.   We  also  want  the  contract  to  tell  us  as  much  as  possible  about  the  effects  that  calling  the\nfunction has on the execution state.  In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions  against  the  function  body/implementation  and  if  the  body  satisfies  the  contract,  we  can  just\nreuse the contract every time we want to check a call to the function.  This contract serves the same purpose\nas  informal  documentation,  written  in  natural  languages.   But  it  is  comprehensive  and  machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet  us  verify  an  imaginary  call  tocreate_dequewith  the  contract  shown  in  Listing  5,  usingMod-\nular  Symbolic  Execution.    First,  we  should  verify  thatcreate_deque’s  body  satisfies  its  contract.    The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds.   Unsurprisingly,truealways  holds  in  Separation  Logic.   So  there  are  no  special  require-\nments,  i.e.  no Heap chunks or facts about symbols,  to assume when we start to verify the function.  Also,\ncreate_dequehas  no  parameters,  which  means  there  is  nothing  in  the\n̂\nstorewhen  we  start  checking  its\nbody.  We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond.  In this specific\ncase,  we  are  starting  from  the  same  state  as  when  we  were  executing  justcreate_dequesymbolically  and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2:  The symbolic execution tree of functioncreate_dequein Listing 4.  The execution paths represent\nthe  paths  with  the  same  colour  in  Figure  1.   The  predicate  names  have  been  abbreviated  in  this  figure  as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::<Node>()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::<Node>())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly.   So  the  next  three  lines  would  have  the  same  effect  and  we  do  not  repeat  those  execution\nsteps here.  Although,  there is an interesting difference at the return point.  The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ...,  is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied.  So the\nreturn  point  is  the  point  where  we  should  verify  theensuresclause.   One  of  the  facts  thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap.  The\nresultkeyword  in  theensuresclause  is  a  binder  for  the  return  value  of  the  function,  here,  the  symbolic\nvalue  stored  insentinel,  i.e.\n̂\nl.   To  verify  theensuresclause  weconsumeits  mentioned  chunks  from  the\n̂\nheap.  That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto  the  caller,  we  deprivecreate_dequeof  those  rights  by  removing  the  chunks  from\n̂\nheap.   It  prevents  us\nfrom transferring access rights of some Heap chunks to the caller twice.  Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check.  The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point.  In our example,  after consuming theensuresclause chunks,\n̂\nheapwould  be  empty.   It  means  we  could  be  sure  thatcreate_dequedoes  not  leak  memory  chunks.   The\ncaller knows  about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code.  Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm.  Now we verified that the contract\nholds.  Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall  site  is  empty.   Bycreate_deque’s  contract,  we  know  it  does  not  need  anything  special  before  calling\nit.  So we are good to go.  We do not look up anything aboutcreate_deque’s body.  The next step of our\nMSE  algorithm  is  to  just  look  upcreate_deque’s  contract  andproducetheensuresclause.   Assuming  we\nrepresent  the  return  value  bŷr,  it  leads  to  addinĝr6=0x0to\n̂\npath\ncondand  adding  the  memory  chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap.  It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4  Modular Symbolic Execution and Verifying Safe Abstractions\nAs  we  mentioned  at  the  beginning  of  this  section  the  Deque  example  is  simple.   That  is  because  first,  its\ninterface is completelyunsafeand second,  it interacts just using raw pointers.  This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves.  In Rust, many\nfacts about a function’s contract are encoded in the function’s type.  In safe Rust,  the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe  type  checker  assumes  the  body  of  the  function  complies  with  its  type.   For  purely  safe  functions  this\nassumption gets checked during the type checking of the function itself.  When it comes to safe abstractions,\nit  is  the  programmer’s  responsibility  to  make  sure  that  the  function  body  complies  with  its  type.   Instead\nof  verifying  statically  checked  safe  code,  it  is  better  to  just  verify  that  safe  abstractions  bodies  satisfy  the\npropositions encoded in their types.  To verify a function’s body, we start verifying the body from a symbolic\nstate  described  by  the  function’s  contractrequiresclause  and  check  the  validity  of  its  contract’sensures\nclause at its return point(s).  Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8].  In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4  RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust.  They all suggest\ncalculi that capture Rust’s essence.  However, we found RustBelt more suitable for our purposes.  RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not.  To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe!  That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode.  RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate  Representation(MIR).  Next,  it  provides  a  formal  interpretation  forλ\nRust\n’s  types  and  typing\njudgments in a dialect of Separation Logic, Iris [2].  This interpretation is the semantic model they provide\nforλ\nRust\n’s type system.  Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1.  “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2.  “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours.  This is calledadequacy.”\n3.  “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed  been  safelyencapsulatedby  the  library’s  API.  In  essence,  the  semantic  interpretation  of  the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically  well-typed  programs  are  safe.\nIn  comparison  with  the  syntactic  approach  for  safety  proofs,  i.e.   Progress  and  Preservation,  there  is  an\nindirection in this semantic proof style.  Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe.  This indirection requires us to define a semantic model and\nmakes the proof longer and harder.  The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed.  This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three.  By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning.  We will talk about the\ndifferences between our approach and RustBelt,  later in the Subsection 5.3.  The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic.  It is a framework\nfor  higher-order  concurrent  separation  logic  that  can  be  used  for  reasoning  about  the  safety  of  concurrent\nprograms.  The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use.  Recall that we are using a dialect of Separation Logic in our MSE as well.  In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1  RustBelt’s semantic model and MSE\nListing  6  shows  the  methodsetof  our  simplifiedCellimplementation  shown  in  Listing  2.   It  has  a\nlifetime  parameter'a,  and  two  normal  parameters.   The  interesting  one  is&'a self.   It  is  a  shorthand\nforself: &'a SelfandSelfin  our  case  isCell.   Our  de-sugared  parameter  would  beself: &'a Cell,\na  parameter  namedselfof  type&'a Cell,  i.e.   a  shared  reference.   A  reference  type  carries  much  more\ninformation than a raw pointer.self’s type tells us the following.\n1.  Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6:  A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7:  RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2.  The parameterselfcarries an aligned non-null address.\n3.  There are enough bytes to store aCellvalue allocated at the address stored inself.\n4.  There is a validCellvalue stored there.\n5.  The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g.  other threads may read it.\nWe need this information in a formal form.  Let us go through RustBelt’s semantics for this shared pointer\nbriefly.  In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8].  Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit.  Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning.  Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n.  Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation.  It is shown in Definition 3.  Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library.  To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors.  The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt  is  used  to  interpret  theCelltype.   Let  us  explore  the  information  this  borrow  and  lifetime  logic  rules\nrepresent aboutCell.  We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference.  That happens\nby applying anunsafesuperpower in itssetmethod.  Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races.  To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread.  Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync.  That means they cannot be\naccessed simultaneously from different threads.  In the Rust type system it means values of type&'a Cellare\nnotSend, i.e.  shared references to values of typeCellare not send-able to other threads.  Moreover, no public\nfunction inCellleaks a deep reference to its contents.  These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert.  A typeτisSync, if and only if, the type of shared references toτ, i.e.  &\nκ\nshr\nτ, isSend.  The fact\n1\nSome details has been dropped for simplicity.  For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references.  It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows.  The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert.  In shortCell’s sharing predicate depends\non  the  thread  identifier.   SinceJ&\nκ\nshr\nτK.own,  shown  in  the  Definition  2,  consists  ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe.  To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow.  In our example,\nthe resources held by a non-atomic persistent borrow.  Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt  describes  how  we  can  get  full  access  to  a  resourcePwhen  we  have  it  under  a  non-atomic  persistent\nborrow.  Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] .  Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access.  But there is more than that about [κ]\nq\nand [Na:t] .  Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing.  That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself.  To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough.  The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role.  Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n.  The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1.  Access to the resources behind the borrow.  Represented inLftL-na-accbyP.\n2.  Anupdatewhich  takes  back  the  borrowed  resource  and  gives  back  the  lifetime  token  fraction  that\nhad  been  used  when  the  rule  was  applied  to  provide  the  resource.   In  the  case  ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction.  It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ.  Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n.  Those updates always need the resources they have handed out,\nback.  That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat  lifetime  have  been  taken  back.   Intuitively,  the  aliveness  of  a  lifetime  is  a  credit,  we  borrow  access  to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover,  the  rule  requires  the  non-atomic  token  [Na:t],  bound  to  the  same  thread  as  the  non-atomic\npersistent borrow.  “This token is created at the birth of the thread, and threaded through all of its control\nflow.  That is, every function receives it and has to return it.”  [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too.  It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt.  Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body.  Moreover we\nneed to check the integrity of the type system invariant at return points.  To keep the text concise, we skip the\ndetails.  Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows:  Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter.  The function’s execution\nperiod is a lifetime, always shown by binderF.  Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread.  And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters.  That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1:  Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo  justify  the  write  inCell::setwe  need  write  permission  for  theCell’s  content.   We  can  get  ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n.  By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access.  But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e.  [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n.  Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a]  we have toLftL-na-acc.  After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ.  The write operation\nupdates  the  value  of  the  chunk  giving  us  the  updated  resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n.   The  state  is\nshown in the row number 5 of Table 1.  By the next statement,Cell::setreturns.Cell::set’s return type\nis  not  shown  explicitly  which  in  Rust  means  it  is(),  i.e.   the  unit  type.   To  closeJ()K.own(\n̂\nt,[])  does  not\nneed  any  resources  so  we  can  easily  close  it  out  of  thin  air.   There  is  no  destructor  call  happening  here  as\nwell.  As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters.  In the case ofCell::setthere is just'a.  So we\nneed to consume back [̂a]\n̂q\n.  By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller.  Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource.  Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e.  giving it back.  The caller needs to take back the lifetime\ntoken fraction provided to call the current function.  Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n.  Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology,  that means if our MSE algorithm verified a safe abstraction,  there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees.  In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5  Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs.  There are two important questions needed to be addressed\nregarding our implementation.  First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e.  parameters and local variables, are owned pointers.  We are just showing them here as store variables.\n15\n\n5.1  Executing MIR\nSurface  Rust  has  a  heavily  sugared  syntax  and  there  is  no  formal  operational  semantics  by  the  language\ncommunity for it.  MIR, however, is heavily simplified by the compiler.  In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph.  But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation.   Generic  definitions  are  also  still  in  place  in  MIR.  Therefore,  it  is  much  simpler  and  easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with.  Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing  this  fact.   Moreover,  to  compensate  for  the  lack  of  formal  operational  semantics,  the  language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is.  That is why we decided to symbolically execute MIR representation in the\nbackground.  To get the MIR representation of a program along with type definitions and user annotations,\nwe  have  implemented  a  Rust  program  which  uses  the  official  Rust  compiler  front-end  to  type  and  borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents  our  tool  to  diverge  from  what  exactly  the  Rust  compiler  is.   If  the  program  passes  the  front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output.  Cap’n Proto is a data interchange format supported in many different programming\nlanguages.  This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2  Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic  propositions  from  scratch.   VeriFast  is  a  research  tool  for  verifying  C  and  Java  programs  annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands.  Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch.  To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining  an  operational  semantics  for  MIR  using  VeriFast’s  C  operational  semantics.   A  similar  process  of\ndefining  operational  semantics  forλ\nRust\nby  translating  it  to  another  language  happens  in  RustBelt.   “The\noperational semantics ofλ\nRust\nis given by translation into a core language.  The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward.  For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them.  As an example, the approach for translating\ntuples is using Cstructs with reserved names.  For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation  Logic.   The  examples  in  appendix  A  illustrate  our  intention  for  generating  RustBelt  rules  and\npredicates for a safe abstraction\n4\n.\nAt  the  time  of  writing  this  report,  the  tool  can  verify  a  simple  example  of  memory  allocation,  access\nand un-allocation, shown in Figure 3.  Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type.  The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::<u8>()andstd::ptr::mut_ptr::<impl *mut u8>::is_null(*mut u8)respec-\ntively.  Generic definitions are not generally handled yet.  For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust.  The development of VeriFast including the MIR extractor\nprogram  is  being  done  in  branchrustin  a  fork  of  VeriFast  that  can  be  found  athttps://github.com/\nNima-Rahimi-Foroushaani/verifast.  The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607.  To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3  Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust  programs  withunsafecode.   To  verify  the  safety  of  a  new  library  with  RustBelt  one  would  need  to\nhave considerable knowledge about Iris in the first place.  Moreover,  it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof.  Bart Jacobs.\n16\n\nFigure 3:  The alloc.rs Rust program verified by VeriFast\nsurface  Rust  code  toλ\nRust\n.   After  all,  it  is  just  the  starting  point  to  the  safety  proof  of  the  program.   In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast.  VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end.  That reduces the\nburden of learning for Rust developers who aim to verify their code.  On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e.  verifiable formal documentation, together in the same\nplace.  Our hypothesis is that it leads to a better information encoding scheme for practicality.  Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6  Future Plans\nIn  subsection  5.3,  we  mentioned  some  practical  added  value  for  verifyingunsafeRust  using  VeriFast  in\ncomparison with RustBelt.  But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n///   size_of::<T>()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n///   size_of::<T>()` bytes must *not* overlap with the region of memory\n///   beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9:  Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas  well  in  the  future.   In  subsection  6.1  we  explain  the  possibilities  of  further  formal  work  to  establish  the\nsoundness of our MSE algorithm.  One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding.  In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1  Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs.  To support\nour soundness claim rigorously,  there are two possible approaches.  One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well.  Another approach is to generate a function-specific Iris proof out of executing\nthe function.  For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2  Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough,  the first two,  are recommended and common ways of reporting unhappy results,  there are many\nplaces Rust code may panic.  “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12].  A program can recover from panic and handle it using\nstd::panic::catch_unwind.  On the other hand,std::process::abort, immediately terminates the current\nprocess.   In  the  case  of  panic,  the  compiler  takes  care  of  the  safety  and  the  cleaning  up  in  the  unwinding\nexecution path.  Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety.  In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12].  Listing 10 shows an example of such bugs,\ninspired by a real-life one [5].  This kind of bug is hard for a human to track.  Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states.  Fortunately, the bug\nfrom the standard library has been fixed.  But notice that it is a mistake made by experts.  This kind of bug is\nstill showing up now and then in the ecosystem.  That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets.  While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety.  The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10.  It showsBasic Blockbb8where the call to functionle, i.e.  operator≤gets executed.  One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take.  First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there.  Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account.  However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold.  We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7  Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry.  Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8].  The solid\nformal foundation we are building upon makes our approach very likely to have solid results.  On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end.  VeriFast as a verification software has proven to be useful.  There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap<T> {\npub data: Vec<T>,\n}\nimpl<T: Ord> BinaryHeap<T> {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::<T>::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10:  An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = <T as PartialOrd>::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community.  Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4]    Yechan  Bae  et  al.  “Rudra:  Finding  Memory  Safety  Bugs  in  Rust  at  the  Ecosystem  Scale”.  In:Pro-\nceedings  of  the  ACM  SIGOPS  28th  Symposium  on  Operating  Systems  Principles.  SOSP  ’21.  Virtual\nEvent,  Germany:  Association  for  Computing  Machinery,  2021,  pp.  84–99.isbn:  9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis  not  exception  safe.  Rust  issue  #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6]    Bart  Jacobs,  Fr ́ed ́eric  Vogels,  and  Frank  Piessens.  “Featherweight  VeriFast”.  In:Logical  Methods  in\nComputer  Science11.3  (2015).  Ed.  by  Tobias  Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7]    Ralf Jung.MutexGuard<Cell<i32>>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9]    Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10]    Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11]    Yusuke  Matsushita,  Takeshi  Tsukada,  and  Naoki  Kobayashi.  “RustHorn:  CHC-Based  Verification  for\nRust  Programs”.  In:Programming  Languages  and  Systems.  Springer  International  Publishing,  2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12]    Contributions  from  the  Rust  Community.The  Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13]    Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA  Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof.  Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing  VeriFast.   The  other  example  in  Listing  13  shows  the  outcome  of  our  intended  translation  from  the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12:  ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it.  This example has been provided by Prof.  Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell<i32> type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell<i32>\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell<i32>, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13:  The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate.  This example has been provided by Prof.  Bart Jacobs\n22",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2212.12976v1",
+      "updated": "2022-12-26T00:19:19Z",
+      "published": "2022-12-26T00:19:19Z",
+      "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks",
+      "summary": "  Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n",
+      "author": [
+        {
+          "name": "Nima Rahimi Foroushaani"
+        },
+        {
+          "name": "Bart Jacobs"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n  Jacobs",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2212.12976v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2212.12976v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.LO",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": [
+        {
+          "$": {
+            "term": "cs.LO",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        },
+        {
+          "$": {
+            "term": "cs.PL",
+            "scheme": "http://arxiv.org/schemas/atom"
+          }
+        }
+      ]
+    }
+  },
+  "doi_10.1007/978-3-540-71229-9_9": {
+    "path": [
+      "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In  achieving  higher  instruction  level  parallelism,  software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated  schedule may be restricted  to cases  where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1   Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2   Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3   ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a  valid schedule with  spill code  suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1   Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load  decision  variable: We  introduce  load  decision  variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2   Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure   2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis  in  the  range  [(T\ndef\ni\n⊕lat\ni\n),  (t\u0004lat\nstore\n)].  There  are  exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure  2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3   Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4   Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5   Experimental Evaluation\n5.1   Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2   Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6   Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7   Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1.  Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli.  Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan.  Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3.  C.M Chen C.M Chang and C.T King.  Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4.  Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator.  InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5.  ILOG CPLEX:. http://www.ilog.com.\n6.  James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7.  Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique.   InICS ’89:  Proceedings  of  the  3rd  international  conference  on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8.  Paul Feautrier.  Fine-grain scheduling under resource constraints.  InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9.  David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10.  R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11.  Richard A. Huff.  Lifetime-sensitive modulo scheduling.  InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13.  Trimaran:   An   infrastructure   for   research   in   instruction   level   parallelism.\nhttp://www.trimaran.org.\n14.  M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15.  Josep  Llosa,  Mateo  Valero,  and  Eduard  Ayguade.Heuristics  for  register-\nconstrained software pipelining.  InMICRO 29: Proceedings  of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16.  Qi Ning and Guang R. Gao.  A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT  Symposium  on  Principles  of  Programming  Languages,  pages  29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing.  InMICRO 14:\nProceedings  of the 14th annual workshop  on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18.  B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19.  B.  Ramakrishna  Rau.   Iterative  modulo  scheduling:  an  algorithm  for  software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20.  Kent Wilken, Jack Liu, and Mark Heffernan.  Optimal instruction scheduling us-\ning integer programming.  InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21.  Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            23
+          ]
+        ],
+        "date-time": "2024-01-23T20:08:48Z",
+        "timestamp": 1706040528010
+      },
+      "publisher-location": "Berlin, Heidelberg",
+      "reference-count": 21,
+      "publisher": "Springer Berlin Heidelberg",
+      "isbn-type": [
+        {
+          "value": "9783540712282",
+          "type": "print"
+        },
+        {
+          "value": "9783540712299",
+          "type": "electronic"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "DOI": "10.1007/978-3-540-71229-9_9",
+      "type": "book-chapter",
+      "created": {
+        "date-parts": [
+          [
+            2007,
+            7,
+            1
+          ]
+        ],
+        "date-time": "2007-07-01T17:39:13Z",
+        "timestamp": 1183311553000
+      },
+      "page": "126-140",
+      "source": "Crossref",
+      "is-referenced-by-count": 11,
+      "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation",
+      "prefix": "10.1007",
+      "author": [
+        {
+          "given": "Santosh G.",
+          "family": "Nagarakatte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "R.",
+          "family": "Govindarajan",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "297",
+      "reference": [
+        {
+          "issue": "6",
+          "key": "9_CR1",
+          "doi-asserted-by": "publisher",
+          "first-page": "180",
+          "DOI": "10.1145/1064978.1065032",
+          "volume": "40",
+          "author": "A. Aleta",
+          "year": "2005",
+          "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "issue": "3",
+          "key": "9_CR2",
+          "doi-asserted-by": "publisher",
+          "first-page": "367",
+          "DOI": "10.1145/212094.212131",
+          "volume": "27",
+          "author": "V.H. Allan",
+          "year": "1995",
+          "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)",
+          "journal-title": "ACM Comput. Surv."
+        },
+        {
+          "issue": "9",
+          "key": "9_CR3",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1016/S0898-1221(97)00184-3",
+          "volume": "34",
+          "author": "C.M. Chen",
+          "year": "1997",
+          "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)",
+          "journal-title": "Computers and Mathematics with Applications"
+        },
+        {
+          "key": "9_CR4",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "174",
+          "DOI": "10.1007/BFb0026430",
+          "volume-title": "Compiler Construction",
+          "author": "K.D. Cooper",
+          "year": "1998",
+          "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)"
+        },
+        {
+          "key": "9_CR5",
+          "unstructured": "ILOG CPLEX: http://www.ilog.com"
+        },
+        {
+          "issue": "1-2",
+          "key": "9_CR6",
+          "doi-asserted-by": "publisher",
+          "first-page": "181",
+          "DOI": "10.1007/BF01205184",
+          "volume": "7",
+          "author": "J.C. Dehnert",
+          "year": "1993",
+          "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)",
+          "journal-title": "J. Supercomput."
+        },
+        {
+          "key": "9_CR7",
+          "doi-asserted-by": "publisher",
+          "first-page": "154",
+          "DOI": "10.1145/318789.318807",
+          "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing",
+          "author": "K. Ebcioglu",
+          "year": "1989",
+          "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807"
+        },
+        {
+          "key": "9_CR8",
+          "series-title": "Lecture Notes in Computer Science",
+          "doi-asserted-by": "publisher",
+          "first-page": "1",
+          "DOI": "10.1007/BFb0025867",
+          "volume-title": "Languages and Compilers for Parallel Computing",
+          "author": "P. Feautrier",
+          "year": "1995",
+          "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)"
+        },
+        {
+          "issue": "8",
+          "key": "9_CR9",
+          "doi-asserted-by": "publisher",
+          "first-page": "929",
+          "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T",
+          "volume": "26",
+          "author": "D.W. Goodwin",
+          "year": "1996",
+          "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)",
+          "journal-title": "Softw. Pract. Exper."
+        },
+        {
+          "issue": "11",
+          "key": "9_CR10",
+          "doi-asserted-by": "publisher",
+          "first-page": "1133",
+          "DOI": "10.1109/71.544355",
+          "volume": "7",
+          "author": "R. Govindarajan",
+          "year": "1996",
+          "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355",
+          "journal-title": "IEEE Transactions on Parallel and Distributed Systems"
+        },
+        {
+          "key": "9_CR11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html",
+          "DOI": "10.1145/173262.155115"
+        },
+        {
+          "key": "9_CR12",
+          "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/"
+        },
+        {
+          "key": "9_CR13",
+          "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org"
+        },
+        {
+          "key": "9_CR14",
+          "doi-asserted-by": "publisher",
+          "first-page": "318",
+          "DOI": "10.1145/53990.54022",
+          "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation",
+          "author": "M. Lam",
+          "year": "1988",
+          "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022"
+        },
+        {
+          "key": "9_CR15",
+          "doi-asserted-by": "publisher",
+          "first-page": "250",
+          "DOI": "10.1109/MICRO.1996.566466",
+          "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture",
+          "author": "J. Llosa",
+          "year": "1996",
+          "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)"
+        },
+        {
+          "key": "9_CR16",
+          "doi-asserted-by": "crossref",
+          "first-page": "29",
+          "DOI": "10.1145/158511.158519",
+          "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
+          "author": "Q. Ning",
+          "year": "1993",
+          "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html"
+        },
+        {
+          "key": "9_CR17",
+          "first-page": "183",
+          "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming",
+          "author": "B.R. Rau",
+          "year": "1981",
+          "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)"
+        },
+        {
+          "issue": "7",
+          "key": "9_CR18",
+          "doi-asserted-by": "publisher",
+          "first-page": "283",
+          "DOI": "10.1145/143103.143141",
+          "volume": "27",
+          "author": "B.R. Rau",
+          "year": "1992",
+          "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141",
+          "journal-title": "SIGPLAN Not."
+        },
+        {
+          "key": "9_CR19",
+          "doi-asserted-by": "publisher",
+          "first-page": "63",
+          "DOI": "10.1145/192724.192731",
+          "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture",
+          "author": "B.R. Rau",
+          "year": "1994",
+          "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731"
+        },
+        {
+          "key": "9_CR20",
+          "doi-asserted-by": "publisher",
+          "first-page": "121",
+          "DOI": "10.1145/349299.349318",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "K. Wilken",
+          "year": "2000",
+          "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318"
+        },
+        {
+          "key": "9_CR21",
+          "doi-asserted-by": "publisher",
+          "first-page": "134",
+          "DOI": "10.1145/349299.349319",
+          "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation",
+          "author": "J. Zalamea",
+          "year": "2000",
+          "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319"
+        }
+      ],
+      "container-title": "Lecture Notes in Computer Science",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2020,
+            11,
+            19
+          ]
+        ],
+        "date-time": "2020-11-19T05:17:09Z",
+        "timestamp": 1605763029000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            null
+          ]
+        ]
+      },
+      "ISBN": [
+        "9783540712282",
+        "9783540712299"
+      ],
+      "references-count": 21,
+      "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9",
+      "relation": {}
+    }
+  },
+  "doi_10.1145/512529.512563": {
+    "path": [
+      "cyclone [jendeley doi 10_1145_512529_512563].pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC.  The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety.  In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector.  To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork.  In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]:  Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734;  NSF  grant  9875536;  AFOSR  grants  F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission  to  make  digital  or  hard  copies  of  all  or  part  of  this  work  for\npersonal or classroom use is granted  without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page.  To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g.,  memory management).  Thede\nfactolanguage for coding such systems is C.  However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous.  Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems.  Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6,  19],  a\nlanguage designed to be very close to C, but also safe.  We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network,  a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions.  To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks.  Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone:  its system for preventing dangling-pointer derefer-\nences and space leaks.  The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error.  No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer  annotations  while  supporting  many  C  id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based:  each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously.  As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions,  which,  in turn, allows  us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination  of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations.   Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2).  We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6).  We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system.  It starts with the constructs\nfor creating regions,  allocating objects, and so on — this\npart is simple because the departure from C is small.  We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone.  For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1    Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions,  which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate  new  heap  objects.   Thenewoperation  is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it.  There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly.  The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime.  In short, Cyclone local dec-\nlarations  and function parameters  have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement.  The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate  objects into the associated  region.   For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely  when execution leaves the body of the enclosed\nstatement.  Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation  of pointers or the meaning of operators  such as&\nand*.  There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?)  are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions.  All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2    Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion.  In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations.  Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t  strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ.  The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types.  A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories.  The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region  nameρ\nr\nfor  the created region.   Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing  dangling-pointer  dereferences.To derefer-\nence a pointer, safety demands that its region be live.  Our\ngoal is to determine at compile-time that no code follows\na dangling pointer.  It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3.     p = &x;\n4.   }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared.  If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work.  Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements.  We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results.  That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array.  These functions all\nexhibit region polymorphism.  Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names.  For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region.  In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720.  In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n.   Other invariants are possible:  If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters.   For example,  we can  define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3    Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n.  If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n.   Cyclone  supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify  outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes.  In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n.  To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4    Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred.   Rather,  we  use a  default completion  of  partial\nprototypes to minimize region annotations.  This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly.  So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it.  As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way,  the function above,  when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated.  In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it.  The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer  must insert  an  explicit region  annotation  to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions;  elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>.  Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion.  In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result.  Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region.  For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment,  and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer.  This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical  problem that Tofte  and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5].  Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point.  Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability.  Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live.  To this end, func-\ntion  types carry  aneffectthat records the set of  regions\nthe function might access.  The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation.  The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}.   In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect.  For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables.  Effect variables are used by TT for\nthree purposes:  (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants.  In an explicitly typed language, we found that\nenforcing these constraints was difficult.  Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ.  We do not know where the elements are\nallocated until we instantiateα.  The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1    Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables,  we  use a  type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables.  For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSet<int*ρ\n1\n,ρ\n2\n>value\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet<int*ρ\n1\n,ρ\n2\n> build_set(list_t<int*ρ\n1\n,ρ\n2\n>e){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed.  Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2    Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially  bound:  Various objects  of typestruct\nIntFncan instantiateαdifferently.  When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{<int*ρ\nL\n> .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n.  The function for the call-back just dereferences\nthe pointer it is passed.  When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects,  we must “leak”\nenough information to prove a call is safe.  Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live.  In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4.    FORMAL SOUNDNESS\nIn a separate technical report [15],  we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory  management,  including  stack  allocation,  dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step,  deterministic rewriting relation\n(→) from  machine  states  to machine  states.   A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v).  The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers.  Note,\nhowever,  that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack.  The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1    Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone.  Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied).  We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types.  The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region.  Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements.  In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region,  dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ.  This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions.  Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit.  Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C.  In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages.  Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations,  paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object.  For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2    Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe  only  novelty  is  ensuring  thatγestablishes  the  con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1  (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed,  and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas.  Here we just sketch two\ncomplications  from  the  proof  of  preservation.   First,  our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]).  As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas,  for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n.  (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.)  This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation  and  run-time support for  Cyclone\nregions is very simple.   Heap and stack manipulation are\nexactly as in C.  Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object.  If there is insuffi-\ncient space for an object, a new page is allocated.  Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist  pages  are  acquired  from  the collector.   The  collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved.  The advantage of this interface is its simplicity,\nbut at some cost:  At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler.  We then\ntransfer control withlongjmp.  In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6.    EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations.   Just as important,  we have sought to achieve\ngood performance.  In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1    Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version.   We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd),  some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting.  First, we changed\nall the programs as little as possible to make them correct\nCyclone programs.   Then,  forcfracandmini_httpd,we\nregionizedthe code:  We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked  “not  null”  annotations  to  pointer  types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1.   For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone.  Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise  from  pointer-syntax alterations.   These  changes  are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?.   We are currently experimenting\nwith  interpretingchar *as a  safe  null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small:  only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode.  The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code.  However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely.  Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server,  we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation.  We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2    Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity.  Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter.  Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time.  Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions.  Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s)    factor\ncacm0.12±0.000.15±0.00    1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01    2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15    0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00    2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02    1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00    1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02    1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00    1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00    1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00    1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00    1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00    1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01    2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00    1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3    Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks.  We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean.  The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version.  We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead  is  not easy  to  measure  in  Cyclone,  because  re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection.  Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead.   Partially  regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead.  Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2.  This is a measure\nof  variability,  similar  to  standard  deviation,  recommended  by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected.  We found that a major cost is due to the repre-\nsentation of fat pointers.  A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]).  The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone  programs.   We  found  that  slight  changes  to  fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7.    RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion  allocation.   We feel  that Cyclone  is  a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking  C  Safe.Many  systems,  including  but certainly\nnot limited to LCLint [10,  9],  SLAM [3],  Safe-C [2],  and\nCCured [25], aim to make C code safe.  Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools.  Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot  eliminate  the checks.   The  primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone.  However, they do not pre-\nserve the same data representations and lifetimes  for ob-\njects.  (Cyclone’sτ?pointers also use a wide representa-\ntion,  but the  use  of  these  pointers  is  under programmer\ncontrol.)   Furthermore,  memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible.  Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced.  Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks.  Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic  Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27].  Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27].  More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types.  All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems  were  not  designed  for  source-level  programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions.  Vault’s powerful type\nsystem allows  a region  to be freed before  it leaves scope\nand its types can enforce that codemustfree a region.  To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior.  However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used.  For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions  in  C.Perhaps the most closely  related  work  is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations.  These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting.  One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions.  In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain.  Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8.    FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early.  We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations,  and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas.  For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks.   We are currently working to\nidentify the bottlenecks,  but a clear problem is with  our\nrepresentation  of  pointers  to  dynamically  sized  arrays  (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly,  to  support  type-safe,  discriminated  unions,  we\nadd implicit tags.  We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations.  How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1]  A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2]  T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3]  T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4]  H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5]  K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6]  Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7]  R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8]  T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9]  D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10]  D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11]  D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12]  D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13]  A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14]  D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16]  N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17]  F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18]  R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19]  T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20]  G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21]  Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22]  J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23]  S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24]  G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25]  G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26]  M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27]  M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28]  M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29]  D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30]  D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31]  D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32]  H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33]  H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            29
+          ]
+        ],
+        "date-time": "2024-01-29T15:59:19Z",
+        "timestamp": 1706543959870
+      },
+      "publisher-location": "New York, NY, USA",
+      "reference-count": 32,
+      "publisher": "ACM",
+      "content-domain": {
+        "domain": [
+          "dl.acm.org"
+        ],
+        "crossmark-restriction": true
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "DOI": "10.1145/512529.512563",
+      "type": "proceedings-article",
+      "created": {
+        "date-parts": [
+          [
+            2004,
+            4,
+            19
+          ]
+        ],
+        "date-time": "2004-04-19T17:18:43Z",
+        "timestamp": 1082395123000
+      },
+      "update-policy": "http://dx.doi.org/10.1145/crossmark-policy",
+      "source": "Crossref",
+      "is-referenced-by-count": 229,
+      "title": "Region-based memory management in cyclone",
+      "prefix": "10.1145",
+      "author": [
+        {
+          "given": "Dan",
+          "family": "Grossman",
+          "sequence": "first",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Greg",
+          "family": "Morrisett",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Trevor",
+          "family": "Jim",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "AT&amp;T Labs Research, Florham Park, NJ"
+            }
+          ]
+        },
+        {
+          "given": "Michael",
+          "family": "Hicks",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "Yanling",
+          "family": "Wang",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        },
+        {
+          "given": "James",
+          "family": "Cheney",
+          "sequence": "additional",
+          "affiliation": [
+            {
+              "name": "Cornell University, Ithaca, NY"
+            }
+          ]
+        }
+      ],
+      "member": "320",
+      "published-online": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "reference": [
+        {
+          "key": "e_1_3_2_1_1_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "e_1_3_2_1_2_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/178243.178446"
+        },
+        {
+          "key": "e_1_3_2_1_3_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/380921.380932"
+        },
+        {
+          "key": "e_1_3_2_1_4_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1002/spe.4380180902"
+        },
+        {
+          "key": "e_1_3_2_1_5_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1999.2829"
+        },
+        {
+          "key": "e_1_3_2_1_6_1",
+          "volume-title": "Technical Report 2001-1855",
+          "year": "2001",
+          "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/"
+        },
+        {
+          "key": "e_1_3_2_1_7_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378811"
+        },
+        {
+          "key": "e_1_3_2_1_8_1",
+          "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability",
+          "volume": "59",
+          "author": "Dowd T.",
+          "year": "2001",
+          "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_9_1",
+          "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/  D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/"
+        },
+        {
+          "key": "e_1_3_2_1_10_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/231379.231389"
+        },
+        {
+          "key": "e_1_3_2_1_11_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/277650.277748"
+        },
+        {
+          "key": "e_1_3_2_1_12_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378815"
+        },
+        {
+          "key": "e_1_3_2_1_13_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360228"
+        },
+        {
+          "key": "e_1_3_2_1_14_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/645396.651967"
+        },
+        {
+          "key": "e_1_3_2_1_16_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/512529.512547"
+        },
+        {
+          "key": "e_1_3_2_1_17_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/773184.773203"
+        },
+        {
+          "key": "e_1_3_2_1_18_1",
+          "volume-title": "The Art of Computer Systems Performance Analysis",
+          "author": "Jain R.",
+          "year": "1991",
+          "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991"
+        },
+        {
+          "key": "e_1_3_2_1_19_1",
+          "volume-title": "USENIX Annual Technical Conference",
+          "author": "Jim T.",
+          "year": "2002",
+          "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002"
+        },
+        {
+          "key": "e_1_3_2_1_20_1",
+          "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html  G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html"
+        },
+        {
+          "key": "e_1_3_2_1_21_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/237721.237791"
+        },
+        {
+          "key": "e_1_3_2_1_22_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/44501.45065"
+        },
+        {
+          "key": "e_1_3_2_1_23_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/378795.378817"
+        },
+        {
+          "key": "e_1_3_2_1_24_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.5555/647228.719245"
+        },
+        {
+          "key": "e_1_3_2_1_25_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/503272.503286"
+        },
+        {
+          "key": "e_1_3_2_1_26_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/291891.291894"
+        },
+        {
+          "key": "e_1_3_2_1_27_1",
+          "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report",
+          "author": "Tofte M.",
+          "year": "2001",
+          "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001"
+        },
+        {
+          "key": "e_1_3_2_1_28_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1006/inco.1996.2613"
+        },
+        {
+          "key": "e_1_3_2_1_29_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/363911.363923"
+        },
+        {
+          "key": "e_1_3_2_1_30_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/507635.507658"
+        },
+        {
+          "key": "e_1_3_2_1_31_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/360204.360218"
+        },
+        {
+          "key": "e_1_3_2_1_32_1",
+          "first-page": "375",
+          "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science",
+          "author": "Xi H.",
+          "year": "2000",
+          "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000"
+        },
+        {
+          "key": "e_1_3_2_1_33_1",
+          "doi-asserted-by": "publisher",
+          "DOI": "10.1145/292540.292560"
+        }
+      ],
+      "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation",
+      "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation",
+      "original-title": [],
+      "link": [
+        {
+          "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563",
+          "content-type": "unspecified",
+          "content-version": "vor",
+          "intended-application": "similarity-checking"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2023,
+            9,
+            4
+          ]
+        ],
+        "date-time": "2023-09-04T21:19:02Z",
+        "timestamp": 1693862342000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://dl.acm.org/doi/10.1145/512529.512563"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "references-count": 32,
+      "alternative-id": [
+        "10.1145/512529.512563",
+        "10.1145/512529"
+      ],
+      "URL": "http://dx.doi.org/10.1145/512529.512563",
+      "relation": {
+        "is-identical-to": [
+          {
+            "id-type": "doi",
+            "id": "10.1145/543552.512563",
+            "asserted-by": "object"
+          }
+        ]
+      },
+      "published": {
+        "date-parts": [
+          [
+            2002,
+            5,
+            17
+          ]
+        ]
+      },
+      "assertion": [
+        {
+          "value": "2002-05-17",
+          "order": 2,
+          "name": "published",
+          "label": "Published",
+          "group": {
+            "name": "publication_history",
+            "label": "Publication History"
+          }
+        }
+      ]
+    }
+  },
+  "arxiv_1704.04861": {
+    "path": [
+      "mobilenet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications.  MobileNets\nare  based  on  a  streamlined  architecture  that  uses  depth-\nwise  separable  convolutions  to  build  light  weight  deep\nneural  networks.   We  introduce  two  simple  global  hyper-\nparameters  that  efficiently  trade  off  between  latency  and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non  the  constraints  of  the  problem.   We  present  extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse  cases  including  object  detection,  finegrain  classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin  computer  vision  ever  since  AlexNet  [19]  popularized\ndeep  convolutional  neural  networks  by  winning  the  Ima-\ngeNet Challenge:  ILSVRC 2012 [24].   The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8].  How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed.   In  many  real  world  applications  such  as  robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis  paper  describes  an  efficient  network  architecture\nand  a  set  of  two  hyper-parameters  in  order  to  build  very\nsmall, low latency models that can be easily matched to the\ndesign  requirements  for  mobile  and  embedded  vision  ap-\nplications.  Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof  different  applications  and  use  cases.   Section  5  closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g.  [16, 34,\n12,  36,  22].   Many  different  approaches  can  be  generally\ncategorized into either compressing pretrained networks or\ntraining  small  networks  directly.    This  paper  proposes  a\nclass  of  network  architectures  that  allows  a  model  devel-\noper  to  specifically  choose  a  small  network  that  matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks.  Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks.  Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections.  Subsequently, the Xception network [3] demon-\nstrated  how  to  scale  up  depthwise  separable  filters  to  out\nperform Inception V3 networks.  Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA  different  approach  for  obtaining  small  networks  is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36],  hashing\n1\narXiv:1704.04861v1  [cs.CV]  17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature.  Additionally var-\nious  factorizations  have  been  proposed  to  speed  up  pre-\ntrained  networks  [14,  20].    Another  method  for  training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network.   It is complementary to\nour  approach  and  is  covered  in  some  of  our  use  cases  in\nsection 4.  Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet  is  built  on  which  are  depthwise  separable  filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions  which  is  a  form  of  factorized  convolutions\nwhich  factorize  a  standard  convolution  into  a  depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution.   For  MobileNets  the  depthwise  convolution  ap-\nplies a single filter to each input channel.   The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer  for  combining.   This  factorization  has  the  effect  of\ndrastically reducing computation and model size.  Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA  standard  convolutional  layer  takes  as  input  aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe  standard  convolutional  layer  is  parameterized  by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe  number  of  input  channelsM,  the  number  of  output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n.  MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining  features  in  order  to  produce  a  new  representation.\nThe  filtering  and  combination  steps  can  be  split  into  two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square.  Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise  separable  convolution  are  made  up  of  two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth).  Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer.  MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis  the  depthwise  convolutional  kernel  of  size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution.   However it only filters input chan-\nnels, it does not combine them to create new features.  So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe  combination  of  depthwise  convolution  and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a)  Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b)  Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers:  depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution.  By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification.  Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer.   A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds.  It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left:  Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity.  Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented  with  highly  optimized  general  matrix  multiply\n(GEMM) functions.   Often convolutions are implemented\nby  a  GEMM  but  require  an  initial  reordering  in  memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis  approach  is  used  in  the  popular  Caffe  package  [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2.  Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet  models  were  trained  in  TensorFlow  [1]  us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar  to  Inception  V3  [31].   However,  contrary  to  training\nlarge models we use less regularization and data augmen-\ntation  techniques  because  small  models  have  less  trouble\nwith overfitting.  When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31].  Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough  the  base  MobileNet  architecture  is  already\nsmall  and  low  latency,  many  times  a  specific  use  case  or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is  the  baseline  MobileNet  andα <1are\nreduced MobileNets.  Width multiplier has the effect of re-\nducing  computational  cost  and  the  number  of  parameters\nquadratically by roughlyα\n2\n.  Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy,  latency and size trade off.   It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4.  Resolution  Multiplier:   Reduced  Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote  that  each  row  is  a  cumulative  effect  adding  on  top  of  the\nprevious  row.   This  example  is  for  an  internal  MobileNet  layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets.  Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs  an  example  we  can  look  at  a  typical  layer  in  Mo-\nbileNet  and  see  how  depthwise  separable  convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer.  The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512.  We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers.   We  then  show  the  trade  offs  of  reducing  the  net-\nwork based on the two hyper-parameters:  width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions.  In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe  next  show  results  comparing  thinner  models  with\nwidth multiplier to shallower models using less layers.  To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters,  that  making  MobileNets  thinner  is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα.  Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs  for  different  resolution  multipliers  by  training  Mo-\nbileNets  with  reduced  input  resolutions.   Accuracy  drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy  and  computation  for  the  16  models  made  from  the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure  4.  This  figure  shows  the  trade  off  between  computation\n(Mult-Adds) and accuracy on the ImageNet benchmark.  Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions.  The number of parameters do not vary\nbased on the input resolution.\nFigure  5  shows  the  trade  off  between  ImageNet  Ac-\ncuracy   and   number   of   parameters   for   the   16   models\nmade  from  the  cross  product  of  width  multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable   8   compares   full   MobileNet   to   the   original\nGoogleNet  [30]  and  VGG16  [27].    MobileNet  is  nearly\nas  accurate  as  VGG16  while  being  32  times  smaller  and\n27 times less compute intensive.  It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable  11.  Performance  of  PlaNet  using  the  MobileNet  architec-\nture.  Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7]  PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web.  We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10.  MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos.  PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds.  The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact.  Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother  use-case  for  MobileNet  is  compressing  large\nsystems with unknown or esoteric training procedures.  In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na  knowledge  transfer  technique  for  deep  networks.    We\nseek  to  reduce  a  large  face  attribute  classifier  with75\nmillion  parameters  and1600million  Mult-Adds.The\nclassifier  is  trained  on  a  multi-attribute  dataset  similar  to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture.   Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets.  Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno  regularization  (e.g.   weight-decay  and  early-stopping),\nbut  also  demonstrates  enhanced  performances.   It  is  evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking:  it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge  [10].   In  table  13,  MobileNet  is  compared  to  VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600).  The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image.  The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe  emulation  quality  is  measured  by  averaging  the  per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture.  Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds  Parameters\n1.0 MobileNet-224   88.7%5683.2\n0.5 MobileNet-224   88.1%1490.8\n0.25 MobileNet-224  87.2%450.2\n1.0 MobileNet-128   88.1%1853.2\n0.5 MobileNet-128   87.7%480.8\n0.25 MobileNet-128  86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures.  mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds  Parameters\ndeeplab-VGG  21.1%34.933.1\nSSD 300Inception V2   22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2   15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2   21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure  6.  Example  objection  detection  results  using  MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25].  It builds face embeddings based on the triplet\nloss.  To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe  proposed  a  new  model  architecture  called  Mo-\nbileNets  based  on  depthwise  separable  convolutions.   We\ninvestigated some of the important design decisions leading\nto an efficient model.  We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks.  As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1]  M.  Abadi,  A.  Agarwal,  P.  Barham,  E.  Brevdo,  Z.  Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2]  W.  Chen,  J.  T.  Wilson,  S.  Tyree,  K.  Q.  Weinberger,  and\nY.  Chen.    Compressing  neural  networks  with  the  hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3]  F. Chollet.  Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4]  M. Courbariaux, J.-P. David, and Y. Bengio.  Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5]  S. Han, H. Mao, and W. J. Dally.  Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6]  J. Hays and A. Efros.   IM2GPS: estimating geographic in-\nformation from a single image.  InProceedings of the IEEE\nInternational  Conference  on  Computer  Vision  and  Pattern\nRecognition, 2008. 7\n[7]  J. Hays and A. Efros.   Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland,  editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8]  K. He, X. Zhang, S. Ren, and J. Sun.  Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9]  G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10]  J.  Huang,  V.  Rathod,  C.  Sun,  M.  Zhu,  A.  Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy  trade-offs  for  modern  convolutional  object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11]  I.  Hubara,  M.  Courbariaux,  D.  Soudry,  R.  El-Yaniv,  and\nY. Bengio.  Quantized neural networks: Training neural net-\nworks  with  low  precision  weights  and  activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12]  F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer.  Squeezenet:  Alexnet-level accuracy\nwith  50x  fewer  parameters  and¡  1mb  model  size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13]  S. Ioffe and C. Szegedy.  Batch normalization: Accelerating\ndeep  network  training  by  reducing  internal  covariate  shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14]  M. Jaderberg, A. Vedaldi, and A. Zisserman.   Speeding up\nconvolutional  neural  networks  with  low  rank  expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick,  S.  Guadarrama,  and  T.  Darrell.Caffe:   Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16]  J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17]  A.  Khosla,  N.  Jayadevaprakash,  B.  Yao,  and  L.  Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop  on  Fine-Grained  Visual  Categorization,   IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\nColorado Springs, CO, June 2011. 6\n[18]  J.  Krause,   B.  Sapp,   A.  Howard,   H.  Zhou,   A.  Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei.  The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19]  A.  Krizhevsky,  I.  Sutskever,  and  G.  E.  Hinton.   Imagenet\nclassification with deep convolutional neural networks.   In\nAdvances  in  neural  information  processing  systems,  pages\n1097–1105, 2012. 1, 6\n[20]  V.  Lebedev,   Y.  Ganin,   M.  Rakhuba,   I.  Oseledets,   and\nV.   Lempitsky.Speeding-up   convolutional   neural   net-\nworks  using  fine-tuned  cp-decomposition.arXiv  preprint\narXiv:1412.6553, 2014. 2\n[21]  W. Liu,  D. Anguelov,  D. Erhan,  C. Szegedy,  and S. Reed.\nSsd:Single   shot   multibox   detector.arXiv   preprint\narXiv:1512.02325, 2015. 7\n[22]  M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23]  S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks.  In\nAdvances  in  neural  information  processing  systems,  pages\n91–99, 2015. 7\n\n[24]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,\nS.  Ma,  Z.  Huang,  A.  Karpathy,  A.  Khosla,  M.  Bernstein,\net  al.Imagenet  large  scale  visual  recognition  challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25]  F. Schroff, D. Kalenichenko, and J. Philbin.  Facenet: A uni-\nfied embedding for face recognition and clustering.  InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26]  L.  Sifre.Rigid-motion  scattering  for  image  classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27]  K.  Simonyan  and  A.  Zisserman.   Very  deep  convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28]  V. Sindhwani, T. Sainath, and S. Kumar.   Structured trans-\nforms  for  small-footprint  deep  learning.InAdvances  in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29]  C.  Szegedy,  S.  Ioffe,  and  V.  Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30]  C.   Szegedy,   W.   Liu,   Y.   Jia,   P.   Sermanet,   S.   Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference  on  Computer  Vision  and  Pattern  Recognition,\npages 1–9, 2015. 6\n[31]  C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking  the  inception  architecture  for  computer  vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32]  B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD.  Poland,  D.  Borth,  and  L.-J.  Li.    Yfcc100m:   The  new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33]  T.  Tieleman  and  G.  Hinton.   Lecture  6.5-rmsprop:  Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34]  M. Wang, B. Liu, and H. Foroosh.  Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35]  T. Weyand, I. Kostrikov, and J. Philbin.  PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36]  J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng.  Quantized\nconvolutional  neural  networks  for  mobile  devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37]  Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang.  Deep fried convnets.  InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1704.04861v1",
+      "updated": "2017-04-17T03:57:34Z",
+      "published": "2017-04-17T03:57:34Z",
+      "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n  Applications",
+      "summary": "  We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n",
+      "author": [
+        {
+          "name": "Andrew G. Howard"
+        },
+        {
+          "name": "Menglong Zhu"
+        },
+        {
+          "name": "Bo Chen"
+        },
+        {
+          "name": "Dmitry Kalenichenko"
+        },
+        {
+          "name": "Weijun Wang"
+        },
+        {
+          "name": "Tobias Weyand"
+        },
+        {
+          "name": "Marco Andreetto"
+        },
+        {
+          "name": "Hartwig Adam"
+        }
+      ],
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1704.04861v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1704.04861v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "path_onnx loop [jendeley no id].pdf": {
+    "path": [
+      "onnx loop [jendeley no id].pdf"
+    ],
+    "title": "onnx loop [jendeley no id].pdf",
+    "idType": "path",
+    "tags": [],
+    "authors": [],
+    "comments": "",
+    "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n  Operator inputs defined as (max_trip_count, condition_var).\n  input (\"\", \"\"):\n      for (int i=0; ; ++i) {\n        cond = ... // Note this value is ignored, but is required in \nthe body\n      }\n  input (\"\", cond) // Note this is analogous to a while loop\n      bool cond = ...;\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (\"\", 1) // Note this is analogous to a do-while loop\n      bool cond = true\n      for (int i=0; cond; ++i) {\n        cond = ...;\n      }\n  input (trip_count, \"\") // Note this is analogous to a for loop\n      int trip_count = ...\n      for (int i=0; i < trip_count; ++i) {\n        cond = ...; // ignored\n      }\n  input (trip_count, cond)\n      int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n      bool cond = ...;\n      for (int i=0; i < trip_count && cond; ++i) {\n        cond = ...;\n      }\n  graph predict-net {\n    %a = Constant[value = <Scalar Tensor [3]>]()\n    %b = Constant[value = <Scalar Tensor [6]>]()\n    %keepgoing = Constant[value = <Scalar Tensor [1]>]()\n    %max_trip_count = Constant[value = <Scalar Tensor [10]>]()\n    %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph \nbody-net>](%max_trip_count, %keepgoing, %b)\n    return\n  }\n  graph body-net (\n    %i[INT32, scalar]           // iteration number\n    %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n    %b_in[INT32, scalar]        // incoming value of loop-carried-\ndependency b\n  ) {\n    %my_local = Add(%a, %b_in)\n    %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n    %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n    %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n    return %keepgoing_out, %b_out, %user_defined_val\n  }\n  {\n    /* User-defined code (enclosing scope) */\n    int a = 3, b = 6;\n    bool keepgoing = true; // Analogous to input cond\n    /* End user-defined code */\n    /* Implicitly-defined code */\n    const int max_trip_count = 10; // Analogous to input M\n    int user_defined_vals[]; // Imagine this is resizable\n    /* End implicitly-defined code */\n    /* initialize loop-carried variables and scan-output variables */\n    bool keepgoing_out = keepgoing\n    int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n    for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n      /* Implicitly-defined code: bind actual parameter values\n         to formal parameter variables of loop-body */\n      bool keepgoing_in = keepgoing_out;\n      bool b_in = b_out;\n      /* User-defined code (loop body) */\n      int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n      b_out = a - b_in;\n      keepgoing_out = my_local > b_out;\n      user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n      /* End user-defined code */\n      /* Implicitly defined-code */\n      user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n    }\n    // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n    // The values below are bound to the output variables of the loop \nand therefore accessible\n    // b_out; user_defined_vals; keepgoing_out;\n  }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21"
+  },
+  "doi_10.1006/inco.1996.2613": {
+    "path": [
+      "region-based-memory-management.pdf"
+    ],
+    "idType": "doi",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2082 . Length:   58 pic  2 pts,    245 mm\nInformation and Computation  \u0015  IC2613\ninformation  and computation132,  109\u0015176  (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer  Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes  and CNRS  URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis  paper  describes  a  memory  management  discipline  for  programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues  are  put  intoregions.  The  store  consists of  a stack  of regions.  All\npoints  of  region  allocation  and  de-allocation  are  inferred  automatically,\nusing  a  type  and  effect  based  program  analysis.  The  scheme  does  not\nassume   the   presence   of   a  garbage  collector.  The  scheme  was   first\npresented  in  1994  (M.  Tofte  and  J.-P.  Talpin,in``Proceedings  of  the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,''  pp. 188\u0015201);  subsequently,  it  has  been  tested  in  The  ML\nKit with Regions, a region-based, garbage-collection free implementation\nof  the Standard  ML Core  language,  which includes  recursive datatypes,\nhigher-order  functions  and  updatable  references  L.  Birkedal,  M.  Tofte,\nand M. Vejlstrup,  (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT    Symposium    on    Principles    of    Programming    Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na  standard  semantics.  We  conclude  by  giving  some  advice  on  how  to\nwrite  programs  that  run  well  on  a  stack  of  regions,  based  on  practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights  of reproduction  in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages,  Portland,  Oregon,  January  1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3429 Signs:  2963 . Length:   52 pic 10 pts,   222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The   target   language,   TExp.   4.1. Dynamic   semantics   for   target.   4.2. Example:   function   values.\n4.3. Example:  region  polymorphism.  4.4. Design  choises.  4.5. Properties  of  region-based  evaluation.\n4.6 Syntactic  equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type  system. 5.4. Substitution lemma.\n6.Using  effects to describe  continuations.\n7.Consistency.\n8.Properties  of  consistency.  8.1. Rule-based  co-induction.  8.2. Preservation  of  consistency.  8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof  of  the correctness  of the  translation.\n10.Algorithms.\n11.Language  extensions. 11.1. References.  11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order  functions.  12.2. Larger  benchmarks.  12.3.  Automatic  program  transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1.  INTRODUCTION\nComputers  have  finite  memory.  Very  often,  the  total  memory  allocated  by  a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus,  a  practical  discipline  of  programming  must  provide  some  form  of  memory\nrecycling.\nOne  of  the  key  achievements  of  early  work  in  programming  languages  was  the\ninvention  of  the  notion  of  block  structure  and  the  associated  implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages,  every point  of allocation is  matched by  a point  of de-alloca-\ntion  and  these  points  can  easily  be  identified  in  the  source  program  (Naur,  1963;\nDijkstra,  1960).  Properly  used,  the  stack  discipline  can  result  in  very  efficient  use\nof  memory,  the  maximum  memory  usage  being  bounded  by  the  depth  of  the  call\nstack rather  than the  number of  memory allocations.\nThe  stack  discipline  has  its  limitations,  however,  as  witnessed  by  restrictions  in\nthe  type  systems  of  block-structured  languages.  For  example,  procedures  are  typi-\ncally  prevented  from  returning  lists  or  procedures  as  results.  There  are  two  main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen  space  for  that  value  is  allocated.  This  allows,  for  example,  arrays  which  are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by  contrast,  it  is not  in general  possible to  determine how big a list is going\nto become,  when  generation of  the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe  allocation  and  de-allocation  scheme  associated  with  block  structure.  When\nprocedures  are  values,  there  is  a  danger  that  a  procedure  value  refers  to  values\nwhich have  been de-allocated.  For example,  consider the following  program:\n110\nTOFTE  AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3887 Signs:  3130 . Length:   52 pic 10 pts,   222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis  expression  is  an  application  of a  function  (denoted  by(let}}}end))  to  the\nnumber  5.  The  function  has  formal  parameteryand  body(*1x,y),  where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)),  wherexis bound to the pair\n(2, 3),  so  that  the  whole  expression  evaluates  to  the  pair  (2, 5).  However,  if  we\nregard  thelet}}}endconstruct  as  a  block  construct  (rather  than  just  a  lexical\nscope),  we  see  why  a  stack-based  implementation  would  not  work:  we  cannot  de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function  which is  returned by the entireletexpression.\nOne  way  to  ease  the  limitations  of  the  stack  discipline  is  to  allow  programmer\ncontrolled  allocation  and  de-allocation  of  memory,  as  is  done  in  C.  (C  has  two\noperations,mallocandfree,   for  allocation  and  de-allocation,  respectively.)\nUnfortunately,  it is  in  general very  hard for  a programmer  to  know when  a block\nof  memory  does  not  contain  any  live  values  and  may  therefore  be  freed;  conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional  languages  (such  as  Haskell  and  Standard  ML)  and  some  object-\noriented  languages  (e.g.,   JAVA)  instead  let  a  separate  routine  in  the  runtime\nsystem,  thegarbage  collector,  take  care  of  de-allocation  of  memory  [3; 14; 15].\nAllocation  is  done  by  the  program,  often  at  a  very  high  rate.  In  our  example,  the\nthree  expressions(2, 3),(fnyO(*1x,y)),   and(*1x,y)each  allocate\nmemory  each  time  they  are  evaluated.  The  part  of  memory  used  for  holding  such\nvalues  is  called  theheap; the ro^ le  of the garbage  collector is  to recycle  those  parts\nof the heap that hold only dead values,  i.e.,  values which are of no consequence to\nthe rest  of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed,  there  is  a  much  quoted  argument  that  the  amortized  cost  of  copying  gar-\nbage  collection  tends  to  zero  as  memory  tends  to  infinity  [2, p. 206].  It  is not  the\ncase,  however,  that  languages  such  as  Standard  ML  free  the  programmer  com-\npletely  from  having  to  worry  about  memory  management.  To  write  efficient  SML\nprograms,  one  must  understand  the  potential  dangers  of,  for  example,  accidental\ncopying or survival of large data structures. If a program is written without concern\nfor  space  usage,  it  may  well  use  much  more  memory  than  one  would  like;  even  if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a  space-efficient one may  require major changes to the code.\nThe  purpose  of  the  work  reported  in  this  paper  is  to  advocate  a  compromise\nbetween  the  two  extremes  (completely  manual  vs  completely  automatic  memory\nmanagement).  We  propose  a  memory  model  in  which  memory  can  be  thought  of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes:  2641 Signs:  1587 . Length:   52 pic 10 pts,   222 mm\nFIG.  1.The  store  is  a  stack  of  regions;  every  region  is  uniquely  identified  by  aregion  name\n(e.g.,r\n0\n)  and  is depicted by a box in the picture.\nstack.  For  example,  a  typical  use  of  a  region  is  to  hold  a  list.  A  program  analysis\nautomatically  identifies  program  points  where  entire  regions  can  be  allocated  and\nde-allocated  and  decides,  for  each  value-producing  expression,  into  which  region\nthe value  should be put.\nMore  specifically,  we  translate  every  well-typed  source  language  expression,e,\ninto  a  target  language  expression,e$,  which  is  identical  withe,  except  for  certain\nregion  annotations.  The  evaluation  ofe$  corresponds,  step  for  step,  to  the  evalua-\ntion ofe.  Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program  which corresponds  to  the above  source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE  AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3877 Signs:  3467 . Length:   52 pic 10 pts,   222 mm\nWe  shall  step  through  the  evaluation  of  this  expression  in  detail  in  Section 4.\nBriefly,  evaluation  starts  in  a  region  stack  with  three  regions  (\\\n1\n,\\\n2\n,  and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n,  and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n,  and\\\n3\ncontain the final result.\nThe  scheme  forms  the  basis  of  the  ML  Kit  with  Regions,  a  compiler  for  the\nStandard  ML  Core   language,   including  higher-order   functions,   references  and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes  only.  A  solution  to  the  other  problem,  handling  values  of  unknown  size,  is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions,  whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology,  region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but  beware  that  the  analysis  has  only  been  developed  in  the  context  of\nStandard  ML  and  relies  on  the  fact  that  SML  is  rather  more  strongly  typed  than\nC. For a strongly  typed  imperative language  like JAVA,  region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare  interested  in  code  generation,  Appendix A  shows  the  three-address  program\nwhich the  ML  Kit  produces  from  the  above  program,  using  both  region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the  semantics of regions,  not  their implementation.\nExperience  with  the  Kit  is  that,  properly  used,  the  region  scheme  is  strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared  to  a  garbage-collected  system  [5].  We  have  found  that  most  of  the\nallocation  is  handled  well  by  the  automatic  region  analysis;  occasionally  it  is  too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer  does  not  know  the  region  inference  rules;  for  now,  we  have  chosen\ninstead  to  make  (usually  small)  transformations  to  the  source  programs  to  make\nthem  more  ``region  friendly.''  We  shall  describe  some  of  those  transformations\ntowards the end  of this paper.\nA  very  important  property  of  our  implementation  scheme  is  that  programs  are\nexecuted  ``as  they  are  written'',  with  no  additional  costs  of  unbounded  size  (see\nAppendix A for a detailed example). The memory management directives which are\ninserted  are  each  constant  time  operations.  This  opens  up  the  possibility  of  using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe  key  problem  which  is  addressed  in  this  paper  is  to  prove  that  the  region\ninference  system  is  safe,  in  particular,  that  de-allocation  really  is  safe,  when  the\nanalysis claims that it is safe.\nWe  do  this  as  follows.  We  first  define  a  standard  operational  semantics  for  our\nskeletal source language,  giving both a static and a dynamic semantics (Section 3).\nWe  then  define  a  region-based  operational  semantics  for  a  target  language;  the\ntarget language is identical to the source language, except that programs have been\nannotated  with  region  information  (Section 4).  In  the  dynamic  semantics  of  the\nsource  language,  there  is  no  notion  of  store;  in  the  target  language  semantics,\nhowever,  there is  a store  which  is organised  as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3601 Signs:  3242 . Length:   52 pic 10 pts,   222 mm\nthe translation from source language to target language in the form of an inference\nsystem  (Section 5).  We  then  define  a  representation  relation  between  values  in  a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas  the  rest  of  the  computation  (after  the  evaluation  ofe)  is  concerned,eand  its\nimage  in  the  target  program  evaluate  to  related  values,  when  evaluated  in  related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused  in  the  region-based  semantics.  The  key  point  is  that  on  that  part  of  target\nmachine  which  can  be  observed  by  the  rest  of  the  computation,  every  value  used\nin the source  language is  faithfully represented by a value in the target language.\nThis  representation  relation  is  defined  as  the  maximal  fixed  point  of  a  certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we  callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however,  we\nshall  give  some  hints  about  how  the  region  inference  rules  we  present  can  be\nimplemented (Section 10).\n2.  RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First,  when a value is created in our\nscheme,  it  is  not  necessarily  put  into  the  topmost  region.  In  the  case  of  function\nclosures,  for  example,  the  closure  is  put  as  far  down  the  stack  as  is  necessary  in\norder to  be  sure  that  the closure will  still exist  should  it ever  be accessed. Second,\nnot  all  regions  have  a  size  which  can  be  determined  at  the  time  the  region  is\nallocated.   Finally,   the   scheme   works   for   higher-order   functions   and   recursive\ndatatypes  and  allocation  is  based  on  the  basis  of  the  type  system  of the  language,\nnot the grammar.\nRuggieri  and  Murtagh  [22]  propose  a  stack  of  regions  in  conjunction  with  a\ntraditional  heap.  Each  region  is  associated  with  an  activation  record  (this  is  not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue  and  Yagi  [13]  present  an  interesting  technique  for  compile-time  analysis\nof   runtime   garbage   cells   in   lists.   Their   method   inserts   pairs   of   HOLD   and\nRECLAIM'instructions  in  the  target  language.  HOLD  holds  on  to  a  pointer,p\nsay,  to  the  root  cell  of  its  argument  and  RECLAIM'collects  those  cells  that  are\nreachable  frompand  fit  the  path  description'.  HOLD  and  RECLAIM  pairs  are\nnested,  so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions.  In  our  scheme,  however,  the  unit  of collection is  one entire  region,  i.e.,\nthere  is  no  traversal  of  values  in  connection  with  region  collection.  The  path\ndescriptions   of   Inoue   and   Yagi   make   it   possible   to   distinguish   between   the\n114\nTOFTE  AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3486 Signs:  2644 . Length:   52 pic 10 pts,   222 mm\nindividual members of a list. This is not possible in our scheme,  as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor  garbagelistcells  produced  by  Quicksort  [13, p. 575].  We  obtain  a  1000\nreclamation rate  (but  for  1  word)  forallgarbage  produced  by Quicksort,  without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al.  [27]  use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe  used  more  than  once.  These  analyses  provide  somewhat  different  information\nfrom ours:  we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin  so-called  simple  form  together  with  a transformation  of  expressions  into  simple\nform.  The  transformation  can  result  in  an  increase  in  the  number  of  evaluation\nsteps   by   an   arbitrarily   large   factor   [10, p. 618].   Georgeff   also   presents   an\nimplementation scheme which does not  involve translation,  although this  relies on\nnot using call-by-value  reduction,  when actual parameters are functions.\nThe  device  we  use  for  grouping  values  according  to  regions  is  unification  of\nregion variables,  using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety,  however,  nor  does he deal with polymorphism.\nTo  obtain  good  separation  of  lifetimes,  we  useexplicit  region  polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample,  a  declaration  of  the  successor  functionfunsucc(x)=x+1  is  compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote   thatsucchas   been  decorated   with   two  extra   formal  region   parameters\n(enclosed  in  square  brackets  to  distinguish  them  from  value  variables  such  asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning  that,  for any\\and\\$,  the function  accepts an integer  at\\and  produces\nan  integer  at\\$  (performing  agetoperation  on  region\\and  aputoperation  on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5  at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which  are different  from  its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3724 Signs:  3055 . Length:   52 pic 10 pts,   222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation  records of the classical  stack discipline.\nWe  use  ideas  from  effect  inference  [12, 16, 17]  to  find  out  where  to  wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying  region-based  store  model,  irrespective  of  whether  these  effects  stem  from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin  [23] and Talpin\nand Jouvelot  [24]  present  a  polymorphic  effect system  with  (side-)  effect masking\nand prove that it is sound,  with respect to a store semantics where regions are not\nreused.\nThe  first  version  of  the  proof  of  the  present  paper  was  recorded  in  a  technical\nreport  [25],  which  in  turn  was  used  as  the  basis  for the proof  outline  in  [26]. In\norder  to  simplify  the  proofs,  several  modifications  to  the  early  proofs  have  been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism,  resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not  required under the value  restriction;  (b)  the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency  has been  strengthened  to prevent closures  with free region variables\n(these   used   to   complicate   the   proof)   (d) the   proofs   have   been   rewritten   and\nreorganised around  the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto  delay  the  allocation  of  regions  and  to  promote  the  de-allocation,  sometimes\nleading  to  asymptotic  improvements  in  space  usage  and  never  leading  to  worse\nresults than region inference without their analysis added.\n3.  THE  SOURCE LANGUAGE,  SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar  for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples   to   the   theory   is   straightforward.   (References,   exceptions,   and   recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial  extensions  of  Standard  ML  have  not  been  studied.  Nor  is  it  clear\n116\nTOFTE  AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3623 Signs:  2786 . Length:   52 pic 10 pts,   222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1.  Notation\nIn  the  rest  of  this  paper  we  shall  use  the  following  terminology.  Afinitemap  is\na map  with finite domain.  Given setsAandB,  the set  of finite maps  fromAtoB\nis denotedAw\u0014\nfin\nB. The domain  and range of a finite mapfare denoted Dom(f)\nand  Rng(f),  respectively.  Whenfandgare  finite  maps,f+gis  the  finite  map\nwhose  domain  is  Dom(f)_Dom(g)  and  whose  value  isg(x),  ifx# Dom(g),  and\nf(x)  otherwise. For any mapfand setA,  we writefaAto mean the restriction of\nftoA.  We  sometimes  write  a  tuple  of  region  variables,  for  example,  in  the  form\n\\\n1\n}}}\\\nk\n,  i.e,  without parentheses and commas.\nWe  often  need  to  select  components  of  tuples\u0015\u0015for  example,  the  region name of\nan address. In  such cases,  we rely on variable names to indicate which component\nis being selected. For  example,  ``rofa''  means ``the region name component ofa''.\n(As  we  shall  see,  an  address  is  a  pair  of  the  form  (r,o),  whereris  a  region  name\nandois an offset.)\n3.2.  Static Semantics  for Source\nFollowing  Damas  and  Milner  (1982),  we  haveML  typesandML  type  schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif  there  exist{\nML\n1\n, ...,{\nML\nn\nsuch  that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML  type\nenvironmentis  a  finite  map  from  program  variables  to  ML  type  schemes.  We  use\nTE\nML\nto  range  over  type  environments.  Whenois  an  ML  type,  type  scheme,  or\ntype environment,  ftv(o)  denotes  the set of type variables that occur free  ino.\nIn Milner's original type discipline,  polymorphism is associated withlet. It has\nturned  out  that  there  are  advantages  to  restricting  polymorphism  so  that  inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage,  a  syntactic  value  is  an  integer  constant  or  a  lambda  abstraction.)  This\nrestriction  is  known  as  thevalue  restriction.  Besides  making  it  easier  to  prove\nsoundness  in  connection  with  references  and  other  language  extensions,  imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave  done  both).  In  fact,  we  shall  take  the  restriction  one  step  further,  and  only\nallow polymorphism  in connection  withletrec. Any program which satisfies the\nvalue   restriction   can   be   turned   into   an   equivalent   program   which   only   has\nletrec-polymorphism,   by   simply   turning   everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$  andzare  fresh  variables.  In  the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  1421 . Length:   52 pic 10 pts,   222 mm\ntheory  that  follows  we  therefore  only  have  polymorphism  in  connection  with\nletrec.  With  this  convention,letx=e\n1\nine\n2\nendis  just  syntactic  sugar  for\n(*x.e\n2\n)(e\n1\n).  We  show  the  rules  forleteven  so,  to  make  it  easier  to  follow  the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3.  Dynamic Semantics for Source\nAnon-recursive  closureis  a  triple(x,e,E),  whereEis  anenvironment,  i.e.,  a\nfinite map  from variables  to values.  We useEto  range  over environments; the  set\nof  environments  is  denoted  Env.  Arecursive  closuretakes  the  form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant  or  a  closure.  We  usevto  range  over  values;  the  set  of  values  is  denoted\nVal.\nEvaluation  rules  appear  below.  They  allow  one  to  infer  statements  of  the  form\nE|&e\u0014v,  read:in  environment  E  the  expression  e  evaluates  to  value  v.  A  closure\nrepresenting a recursive  function  is ``unrolled'' just  before it is  applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f)   E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE  AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3488 Signs:  2051 . Length:   52 pic 10 pts,   222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4.  THE  TARGET LANGUAGE,  TExp\nWe  assume  a  denumerably  infinite  set  RegVar=[\\\n1\n,\\\n2\n, ...]ofregion  variables;\nwe  use\\to  range  over  region  variables.  The  grammar  for  the  target  language,\nTExp,  is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence,  theat\\qualifies*x.e,  note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it  to actual  arguments\\$\n1\n, ...,\\$\nk\n,  and  creates a function  closure in\\$.\nFor  any  finite  set[\\\n1\n, ...,\\\nk\n]of  region  variables  (k\u001e0),  we  writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na  semantics  can  be  extracted  from  the  translation  rules  in  Section 5.  We  thus\nproceed to the dynamic semantics.\n4.1.  Dynamic Semantics  for Target\nAssume  a  denumerably  infinite  set RegName=[r1,r2, ...]ofregion  names;we\nuserto  range  over  region  names.  Region  names  serve  to  identify  regions  at  run-\ntime.  Further,  assume  a  denumerable  infinite  set,  OffSet,  ofoffsets;  we  useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan  integer  constant,  a  function  closure,  or  a  region  function  closure.  We  usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes:  3926 Signs:  3414 . Length:   52 pic 10 pts,   222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto  range  over  region  environments;  the  set  of  region  environments  is  denoted\nRegEnv.  Afunction  closureis  a  quadruple(x,e$,VE,R),  wherexis  a  program\nvariable,e$  is  a  target  language  expression,  andVEandRgive  meaning  to  the\nfree  program  and  region  variables  of*x.e$.  Aregion  function  closureis  a  tuple\nof  the  form(\\\n1\n}}}\\\nk\n,x,e,VE,R).  Region  function  closures  represent  region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses  and  Addr  to  denote  the  set  of  addresses.  For  any  addressa,  we  writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom  region  names  to  regions.  We  usesto  range  over  stores;  the  set  of  stores  is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a)  to  means(r)(o).  Similarly,  we  writes+[(r,o)[sv]as  a shorthand  for\ns+[r[(s(r)+[o[sv])].  Moreover,  we  define  theplanar  domain  of  s,  written\nPdom(s),  to  be  the  finite  set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))].  Finally,\nwe write ``s\"\"[r]''  (read:s  without r)  to mean the storesa(Dom(s)\"[r]).\nThe  inference  rules  for  the  dynamic  semantics  of  TExp  are  shown  below.  They\nallow  one  to  infer  sentences  of  the  forms,VE,R|&e$\u0014v$,s$,  read:In  store  s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps  modified)store s$.\nRule 10  the  evaluation  rule  for  application  of  a  region function  closure.  A  func-\ntion  closure  is  created  from  the  region  closure.  One  can  imagine  that  a  runtime-\nerror occurs if the premises cannot be satisfied (for example,  because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs  that result  from  the translation.\nRule  14  concerns  region-polymorphic  and  (possibly)  recursive  functions.  For\nreasons  explained  in  Section 5.2,  we  have  chosen  to  combine  the  introduction  of\nrecursion  and  region  polymorphism  in  one  language  construct.  Functions  defined\nwithletrecneed not be  recursive,  so  one  can also use theletrecconstruct to\ndefine  region  functions  that  produce  non-recursive  functions.  Rule  14  creates  a\nregion  closure  in  the  store  and  handles  recursion  by  creating  a  cycle  in  the  store:\nfirst  a  ``fresh  address''  is  chosen  (by  side-conditionsr=R(\\),o\u0012Dom(s(r));  the\nenvironmentVE$=VE+[f[(r,o)]is   stored   in   the   region   function   closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R),  which  in  turn  is  stored  in  the  fresh  address  chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself,  by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication,  the  operator  expression  will  evaluate  to  a  pointer  to  an  ordinary\nfunction   closure(x,e,VE\n0\n,R\n0\n),   even   if   the   operator   expression   is   of   the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\.  Consequently,   a  single  rule  for  function  application\nsuffices.\nFinally,  the pushing and  popping of  the region stack  is seen in Rule 15.\n120\nTOFTE  AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2895 Signs:  1367 . Length:   52 pic 10 pts,   222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2.  Example:  Function Values\nLet  us  consider  the  evaluation  of the expressione$ from  Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur  free  ine$,  they  must  be  allocated  before  the  evaluation  ofe$  begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas  been  allocated,  (b) just  before  the  closure  is  applied,  and  (c)  at  the  end;  we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice  the dangling,  but harmless,  pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes:  2292 Signs:  1335 . Length:   52 pic 10 pts,   222 mm\n4.3.  Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction  takes  two  arguments,  namely\\\n3\n,  which  is  the  region  wherexis\nlocated, and\\\n4\n,  which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system,  the recursive\ncalls  offibuse  regionsdifferentfrom\\\n3\nand\\\n4\n(and  the  two  recursive  calls  use\nseparate regions). For example, the first call first reserves space for the result of the\ncall  (\\\n5\n),  then  reserves  space  for  the  actual  argument  (\\\n8\n),  then  creates  the actual\nargument,  performs  the  call,  de-allocates  the  actual  argument,  and  uses the result,\ntill it can be discarded  (after the +).\nTheletrecstores  the  following  cyclic  region  function  closure  in  the  store  at\nsome new  address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function  closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE  AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2129 Signs:  1556 . Length:   52 pic 10 pts,   222 mm\nFIG.  2.The Fibonacci  function annotated with regions.  The result  will be a single integer  in\\\n1\n.\nWe  see  that  region  inference  has  produced  allocations  and  de-allocations  very\nsimilar  to  those  of  a  traditional  stack-based  implementation.  Indeed,  the  maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion,  as it  would be  in a pure stack discipline.\n4.4.  Design Choices\nThe region-based semantics relies on a number of design choices,  some of which\nare crucial.\nFirst,  it  is  crucial  that  the  sets  RegName  and  OffSet  can  be  any  (denumerable)\nsets.  We  do  not  assume  that  these  sets  are  ordered  or  that  there  is  any  notion  of\naddress locality.  Thus no particular physical  implementation  of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used  in the ML Kit is described in  [5].\nSecond,  it  is  crucial  that  the  semantics  uses  so-called  ``flat  environments'';  the\nalternative (``linked environments'') is to represent the environment as a linked list\nof   environment   frames.   This   is   a   popular   representation   in   block-structured\nlanguages  and  in  some  functional  languages.  With  linked  environments,  closure\ncreation  is  cheap,  but  it  does  not  work  with  regions,  at  least  if  the  environment\nframes  are  interspersed  with  regions  on  one  stack!  In  Example 4.2,  it  is  essential\nthat  we  copy  the  environment  into  the  closure  for*y.(*1x,y)at\\\n1\nso  that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2855 . Length:   52 pic 10 pts,   222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed  (in  the ML Kit,  integers  and other values  that  fit in one  machine  word are\nrepresented  unboxed).  Recursion  could  probably  have  been  implemented  using\nunfolding of closures rather than cycles in the store. Finally,  there is no deep need\nto  keep  the  region  environment  and  the  variable  environment  separate  in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5.  Properties  of Region-Based Evaluation\nWe  can  now  state  formally  that  the  complete  evaluation  of  an  expression  does\nnot  decrease  the  store.  For  arbitrary  finite  mapsf\n1\nandf\n2\n,  we  say  thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r),  for  allr# Dom(s\n1\n).\nLemma4.1.If  s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe  proof  is  a  straightforward  induction  on  the  depth  of  inference  ofs,VE,\nRE|&e\u0014v,s$.  The  formula  Dom(s)=Dom(s$)  in  Lemma 4.1  expresses  that  the\nstore  resulting  from  the  elaboration  has  neither  more  nor  fewer  regions  than  the\nstore  in   which   the   evaluation  begins,   although   other  regions  may  have  been\nallocated  temporarily  during  the  evaluation.  The  evaluation  ofemay  write  values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites  any  of the  values that are ins.\n4.6.  Syntactic Equality of Expressions\nLete$  be  a  target  expression.  The  set  of  program  variables  that  occur  free  ine$\nis written fpv(e$ ). The  set of region variables  that occur free ine$ is frv(e$).\nBoth  in  the  source  language  and  in  the  target  language,  we  shall  consider  two\nexpressions  equal,  if  they  can  be  obtained  from  each  other  by  renaming  of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare  considered  equal  ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare  equal  in  the  above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof  the  free  variables  of*x\n1\n.e\n1\n,  provided  of  course  that  the  corresponding  change\nhas  been  made  in  the  domain  ofVE\n1\nto  obtainVE\n2\n.  (Loosely  speaking,  this\ncorresponds to admitting value environments as declarations and then allowing the\nusual  renamings  permitted  in  an  expression  of  the  formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally,   we   consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal,   ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e).  This  allows  us  to  introduce  and  delete  unused  program  variables\nin the domains of environments inside closures.\nSimilarly,  for  any  region  closure(\\\u0011,x,e,VE,R)we  allow  the  renamings  of\n\\\u0011,x, fpv(e)  and  frv(e)  and  the  introduction  or  elimination  of  unused  program\n124\nTOFTE  AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2899 Signs:  1852 . Length:   52 pic 10 pts,   222 mm\nvariables  that  one  would  expect  if  the  closure  were  written  letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality  on  semantic  objects  in  each  of  the  two  dynamic  semantics  is  then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5.  REGION INFERENCE\nThe rules that  specify  which translations  are legal are called theregion inference\nrules. In  Section 5.1 we present  region types  and other semantic objects  that occur\nin  the  region  inference  rules;  the  rules  themselves  are  presented  in  Section 5.2.  In\nSections 5.3  and  5.4  we  state  and  prove  properties  of  the  region  inference  system;\nfor example,  that  the  translation  is a refinement of Milner's type discipline.\n5.1.  Semantic Objects\nRegion  Types.    We  assume three denumerably infinite,  pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo  avoid  too  many  subscripts  and  primes,  we  use  bothp(for  ``place'')  and\\to\nrange over  region variables.  Anatomic  effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to  range  over  effects.  For  a  concrete  example,  the  effect  of  expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe  object=..is  called  anarrow  effect.  Formally,  an  arrow  effect  is  a  pair  of  an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas  the  effect  of  evaluating  the  body  off.  Effect  variables  are  useful  for  expressing\ndependencies between  effects. For  example,  the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3490 Signs:  2507 . Length:   52 pic 10 pts,   222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn  (17)  the  last  occurrence  of=\n1\nindicates  that  for  alle\n1\nande\n2\nof  the appropriate\ntype,  ife\n1\nevaluates  to  some  function,g,  ande\n2\nevaluates  to  some  value,v,  then\nthe  evaluation  of  (e$e\n1\n)e\n2\nmay  involve  an  application  ofg.  (As  it  happens,  the\nevaluation  would   indeed   involve   an   application   ofg,   but   the   type   does   not\nexpress that.)\nEquality  of  types  is  defined by term  equality,  as  usual,  but  up to  set equality of\nlatent  effects.  For  example,  the  arrow  effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne  might  wonder  why  we  have  a  pair=..on  the  function  arrow  rather  than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows  pose  a  problem  for  the  existence  of  principal  unifiers.  A  solution  is  to  use\narrow effects  together  with certain invariants  about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  formed  by  the  inference  algorithm  and=\n1\n==\n2\nthen  it  will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur  in  a  proof  tree  and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n:  the  presence  of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes  the  entire  effect.\n1\nwhich=\n1\nstands  for.  With  these  repre-\nsentation  invariants  and  using  the  special  notion  of  substitution  defined  below,\none  can  prove  the  existence  of  principal  unifiers,  even  though  types  ``contain''\neffects   (which   are   sets).   A   detailed   account   of   how   this   is   done   is   beyond\nthe  scope  of  this  paper.  Also,  the  invariants  mentioned  above  are  not  needed  for\nproving  the  soundness  of  region  inference,  so  we  shall  not  consider  them  in  what\nfollows.\nSubstitution.Atype  substitutionis  a  map  from  type  variables  to  types;  we  use\nS\nt\nto  range  over  type  substitutions.  Aregion  substitutionis  a  map  from  region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect  substitutions.  Asubstitutionis  a  triple  (S\nt\n,S\nr\n,S\ne\n);  we  useSto  range  over\nsubstitutions.  Substitution  on  types,  region  variables,   and  effects  is  defined  as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE  AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3541 Signs:  1727 . Length:   52 pic 10 pts,   222 mm\nTypes and Region  Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete  example,  consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n),  where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one  will  then be  able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn  applying  substitutions  to  semantic  objects  with  bound  names  (e.g.,  a  type\nscheme)   bound  variables  are  first  renamed  to  avoid  capture,   when  necessary.\nSubstitutions compose; Id  is the identity substitution.\nThesupportof  a  type  substitutionS\nt\n,  written  Supp(S\nt\n),  is  the  set[:# TyVar  |\nS\nt\n(:){:].  Similarly  for  region  substitutions.  Thesupportof  an  effect  substitution\nS\ne\n,  written Supp(S\ne\n),  is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n),   written  Supp(S),  is  defined  as  Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe  liberty  of  considering  the  triple  (S\nt\n,S\nr\n,S\ne\n)  a  substitution,  without  explicitly\nextending the finite maps  to total maps.\nType  Schemes.    Type  schemes  resemble  the type  schemes  of Damas  and Milner\n[7]  but  with additional  quantification  over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2548 Signs:  1879 . Length:   52 pic 10 pts,   222 mm\nwheren\u001e0,k\u001e0  andm\u001e0.  The  following  definitions  are  stated  for  compound\ntype  schemes  but  are  easily  extended  to  simple  type  schemes.  For  a  type  scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n,  thebound variables of _,  written bv(_),  are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming  and  reordering  of  bound  variables.  A type{$isaninstance of  _,  written\n_\u001e{$,  if  there  exists  a  substitutionSsuch  that  Supp(S) \u001fbv(_)  andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via  S. Equivalent type schemes  have the same instances.\nWe  sometimes  write{as  a  shorthand  for  the  simple  type  scheme\\().{,  not  to\nbe confused with the  compound  type scheme\\().{\n\u0014\n,  since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions,  even  for  those  region-polymorphic  functions  that  take  an  empty  list  of\nactual  region  parameters.  The  underlining  serves  to  make  it  clear  whether  a  type\nscheme is  to be  regarded as simple or compound.\nAtype  environmentis  a  finite  map  from  program  variables  to  pairs  of  the  form\n(_,\\). We  useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA,  frv(A)  denotes  the  set  of  region  variables  that  occur  free  inA;  ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG.  3.    Semantic objects of region inference.\n128TOFTE  AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3454 Signs:  1626 . Length:   52 pic 10 pts,   222 mm\n5.2.  The Inference System\nThe inference rules  allow  the inference of  statements of the form\nTE|&eOe$:+,.\nread:in  TE,e  translates  to  e$,which  has type  and  place + and  effect ..  The region\ninference rules are non-deterministic: givenTEande,  there may be infinitely many\ne$,+,  and.satisfyingTE|&eOe$:+,..  This  non-determinism  is  convenient  to\nexpress type-polymorphism,  but  we  also use  it to  express freedom  in the  choice of\nregion variables. Indeed,  the region inference rules allow one to put all values in a\nsingle region,  although,  in  practice,  this would be  the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn  Rule 21,  note  that  the  effect  of  referring  toxis  empty;  this  is  because  the\neffects only  relate  to  access  of the region stores,  not the environmentsVEandR.\nIn  Rule 22  the   instances  of  the  bound  region  variables  become  actual  region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3655 Signs:  2838 . Length:   52 pic 10 pts,   222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn  Rule 23,  the  effect  of  creating  the  function  closure  at  region\\is  simply\n[put(\\)].  Following  Talpin  and  Jouvelot  [24],  one  is  allowed  to  make  the  infor-\nmation about the function  less precise by increasing the latent effect. This is useful\nin  cases  where  two  expressions  must  have  the  same  functional  type  (including  the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease  effects  is  also  useful  when  one  wants  to  prove  that  every  well-typed  Exp-\nprogram  of  Milner  [18]  can  be  translated  with  the  region  inference  rules\u0015\u0015see\nLemma 5.2  below.  We  shall  explain  the  side-condition  frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn  Rule 24  we  see  that  the  latent  effect  is  brought  out  when  the  function  is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order  to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe  absence  of  bound  region  variables  is  due  to  the  fact  that  introducing  bound\nregion variables  (and  hence delaying the evaluation ofe$\n1\n)  may  change  the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally,  one could\nallow  quantification  of  effect  variables  in  Rule 25,  as  indeed  we  did  in  [25],  but\neffect  quantification  in  simple  type  schemes  appears  to  be  of  limited  practical  use\nand  it  complicates  the  proof  of  Lemma 8.3  below  considerably  [25],  so  we  have\nabandoned it.\nIn  Rule 26,  note  thatfis  region-polymorphic,  but  not  type-polymorphic,  inside\ne\n1\n,  its  own  body.  Ine\n2\n,  however,fis  polymorphic  in  types,  regions  and  effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27  concerns  the  introduction  ofletregionexpressions.  The  basic  idea,\nwhich   goes   back   to   early   work   on   effect   systems   [17],   is   this.   Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion  ofe$).Then  \\  is  purely  local  to  the  evaluation  of  e$,in  the  sense  that  the  rest\nof the computation will not access any value stored in \\.\nExample.    Once  again,  consider  the  expressione$  from  Section 1.  Lete$\n0\nbe  the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe  type  environment  in  force  when  this  expression  is  produced  isTE\n0\n=[];  the\ntype and place  ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE  AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3741 Signs:  2780 . Length:   52 pic 10 pts,   222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly  region variable  which  occurs free  in.\n0\nbut  occurs  free  neither  inTE\n0\nnor in\n+\n0\n.  Rule 27  allows  us  to  discharge\\\n6\n,  resulting  in  the  effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext,  Rule 28  allows  one  to  discharge  an  effect  variable  from  the  effect  of  an\nexpression;  noletregionis  introduced,  since  the  discharge  does  not  influence\nevaluation.\nWe  owe  the  reader  an  explanation  for  the  side-condition  frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated  expression  occurs  free  either  in  the  type  or  in  the  effect  of  the  expression.\nHowever,  here  is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)].  Here  we  see  that\\\n3\nis  free  in  the\ntarget expression  but  occurs free  neither in  the effect nor in  the resulting  type  and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining  free   region  variables   which  only   occur  in  dead  code.  Such  region\nvariables  complicate  arguments  about  renaming  of  region  variables,  specifically\nthey  complicate the proof  of Lemma 8.3,  if  allowed. We  therefore impose  the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying  Rule 27  to the  function body,  just before applying Rule 23,\nfor in Rule 27  there is  no requirement that\\must occur free  in..\nAs  mentioned  earlier,  the  region  inference  rules  give  rise  to  a  static  semantics\nfor   the   target   language:   one   just   consistency   replaces   sentences   of   the   form\nTE|&eOe$:+,.byTE|&e$:+,..  However,  we  prefer  the  present  formulation,\nwhich emphasises that the rules specify a translation.\n5.3.  Region  Inference Is  a Refinement  of  Milner's Type System\nIn  this  section  we  prove  that  the  region  inference  system  is  a  refinement  of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular,  this shows that the problem of determining\nwhether a closed expression  can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend,  we define a function,?,  (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3850 Signs:  2390 . Length:   52 pic 10 pts,   222 mm\nThe proof is  a straightforward  induction  on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe  some  fixed  effect  variable.  The  basic  idea  is  to  choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere  we  need  an  arrow  effect  in  the  translation.  Unfortunately,  we  cannot\nsimply  makeRa  map,  because  of  the  distinction  between  simple  and  compound\ntype schemes.  So we  defineRinductively as follows:\n:R:intRint\n{R+   {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly,  for  everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By  induction  on  the  depth  of  inference ofTE|&e:{. We  show only two\ncases,  as the rest are straightforward.\n[e#x].By  assumption  we  haveTE(x)=_and_\u001e{.  SinceTE R TE$we\nthen  haveTE$(x)=(_$,\\\n0\n)  for  some_$  which  satisfies_R_$.  Now_$  may  be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n)  be  the  unique  type  with  place  satisfying{R+.  Then_$\u001e{$  and  the\ndesired conclusion follows either by Rule 21  or by Rule 22.\n[e#*x.e\n1\n].    Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen  inferred  from  the  premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n.  We  have  (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n],  frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].  Now  Rule 23  con-\nveniently   allows   us   to   use   this   inclusion   to   proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4.  Substitution Lemma\nLemma5.3.For   all   substitutions   S,if   TE|&eOe$:+,.   then   S(TE)|&eO\nS(e$):S(+),S(.).\nThe   proof   is   a   straightforward   induction   on   the   depth   of  the   inference  of\nTE|&eOe$:+,.,  using  appropriate variants  ofSin the case forletrec.\nNext,  we  shall  state  a  lemma  to  the  effect  that  the  operation  of  making  type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE  AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3414 Signs:  2513 . Length:   52 pic 10 pts,   222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n,  written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare  identical,  or_\n1\nand_\n2\nare  both  compound\nand_\n1\n=\\:\u0011._\n2\n,  for  some:\u0011.  Furthermore,  we  writeTE\n1\nc\n=\nTE\n2\nif  Dom(TE\n1\n)=\nDom(TE\n2\n)  and,  for  allx# Dom(TE\n1\n),  if  (_\n1\n,\\\n1\n)=TE\n1\n(x)  and  (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If  TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,..  We  note,   however,   that  the  similar  statement  concerning\nregion  polymorphism  (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n)  is  not  true,  because\napplications  of  region  functions  in  the  target  expression  can  be  affected  by such  a\nchange.\nFortunately,  it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6.  USING  EFFECTS TO DESCRIBE CONTINUATIONS\nFor  the  proof  of  the  soundness  of  the  translation  scheme,  we  need  to  relate  the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as  theconsistencyrelation.\nSince  all  values  are  addresses  in  the  target  language  semantics,  the  consistency\nrelation  must  involve  stores.  Consistency  also  naturally  depends  on  types:  at  type\nint,  source  level  integers  can  only  be  consistent  with  pointers  to  integers  in  the\ntarget;  at  a  functional  type,  only  closures  can  be  related,  and  so  on.  The  region\ninference  rules  yield  expressions,  types  with  places,  and  effects\u0015\u0015all  of  which  can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime,  we need a region environment,R,\nand the following definition:\nDefinition6.1.  Aregion  environment  Rconnects  effect.to  stores,  if  frv(.)\u001f\nDom(R)  and for all\\# frv(.),R(\\) # Dom(s).\nBased  on  these  considerations,  assume  that  we  have  defined  consistency  as  a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects  .  to  s  then  there  exists  a  store  s$and  a  target  value  v$such  that  s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3774 Signs:  3146 . Length:   52 pic 10 pts,   222 mm\nHowever,  there  is  a  problem  with  this  conjecture.  Informally,  it  states  that  con-\nsistency  is  preserved  by  evaluation.  Unfortunately,  we  cannot  expect that  to  hold!\nTo  see  what  the  problem  is,  consider  Example 4.2  once  more.  According  to  the\nconjecture,   at   point   (b)   we   should   have   that   the   source   language   closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na   sense   they   are   consistent:   application   of   the   two   closures   map   consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween  the  source  environment[x[(2, 3)]and  its  representation  in  the  target\nsemantics  was  partly  destroyed  when  the  regionr\n6\nwas  popped  from  the  region\nstack.  Thus  we  see  that,  intuitively  speaking,  consistency  gradually  deteriorates\nduring computation. The saving  factor,  it turns out,  is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo  make  these  intuitions  precise,  we  need  some  notion  of  ``consistency  with\nrespect  to  the  rest  of  the  computation.''  One  possibility  is  to  work  explicitly  with\ncontinuations   or   evaluation   contexts.   However,   we   have   not   explored   this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we  define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$),  also writtenC(R,+,v,s,v$) w.r.t..$,  is read:at type with\nplace +,in region environment R  and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation).  In  our  example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)],  connected  via  the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion  does  not  access  the  current  contents  ofr\n6\nis  evident  from  the  fact  that  no\nregion variable  free  in.$  is  connected tor\n6\n! That  is  why the  environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of  our conjecture becomes:\nConjecture6.2.    IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE)  w.r.t.\n(._.$)  andRconnects._.$tosthen  there  exist  a  stores$  and  a  target  value\nv$  such thats,VE,R|&e$\u0014v$,s$  andC(R,+,v,s$,v$) w.r.t..$.\nIn  other  words,  if  we  start  out  with  consistency  to  cover  both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe  computation  ofe$,  we  will  have  enough  consistency  left  for  the  rest  of  the\ncomputation.\nHowever,  Conjecture 6.2  is  not  quite  strong  enough  to  be  proved  by  induction.\nConsider  a  source  language  closure(x,e,E)and  a  target  closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE  AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2770 Signs:  1579 . Length:   52 pic 10 pts,   222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding   evaluation   in   the   target   language   takes   the   forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } .  However,  the  region  environment  in  whiche$  is  evaluated  is  not\nnecessarily  the  same  as  the  region  environmentR$  which  is  in  force  at  the  point\nwhere  the  application  takes  place,   for  more  regions  may  have  been  allocated\nsince  the  closure  was  created.  Moreover,R$  is  important  for  establishing  that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are  consistent,  sincev\n2\nandv$\n2\nwill  be  known  to\nbe  consistent  inR$,  not  inR.  And  we  must  establish  consistency  ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample.    Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)],  which  is  the  net  effect  of  the  remainder  of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider  the  point  where  the  actual  argument  4  tofhas  been  stored,  the  closure\nforfhas  been  fetched  and  we  are  just  about  to  evaluate  the  body  off.  Now  the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3585 Signs:  2629 . Length:   52 pic 10 pts,   222 mm\nregion   environment   has   becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n],   the   store   has   become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can  reasonably  expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)],  i.e.,  the  effect  of  the  continuation  at  that\npoint. From  (29)  and  (30)  we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe  theorem  needs  to  be  strong  enough  to  handle  the  situation  that  the  region\nenvironment  in  which  consistency  is  established  is  not  the  same  as  the  region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared  in  an  enclosing  block.  (Indeed,  it  appears  that  although  the  variable\nenvironments do not  obey  a stack  discipline,  the region environments do.)\nWe  therefore  prove  that  the  theorem  holds  not  just  forRbut  also  for  other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2.    LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$  agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If    TE|&eOe$:+,.    andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v   and   R   connects   ._.$to   s   and   R$and   R   agree   on   ._.$and\nfrv(e$ )\u001fDomR$then   there   exist   s$and   v$such   that   s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote  that  we  use  the  effect  of  the  rest  of  the  computation  as  an  approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat  data  is  live,  if  it  is reachable  in the memory  graph)  is  incomparable: we  have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan  be  de-allocated  using  region  inference;  conversely,  sometimes  data  which  we\nkeep  alive  in  a  region  is  not  actually  used  by  the  rest  of  the  computation  and  a\ngarbage collector would  detect it.\n7.  CONSISTENCY\nFor  simplicity,  we  first  present  the  consistency  relation  in  the  form  of  inference\nrules  without  reference  to  the  underlying  mathematics.  We  shall  later  explain  that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator.  For now,  it  suffices to  read the rules as  follows: the  conclusion  of a  rule\nholds if  and only if the premises hold.\n136\nTOFTE  AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3424 Signs:  2723 . Length:   52 pic 10 pts,   222 mm\nRules 31\u001535  characterize  consistency  between  source  values  and  storable  target\nvaluessv(defined  in  Section 4.1).  These  rules  are  used  in  Rules 36  and  37,  to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It  is  precisely in  rules Rule 36 and 37 we see the significance of the\nidea  of  representing  the  rest  of  the computation  by the effect.:ifget(\\)\u0012.,  then\nany  claim  about  consistency  of  values  at  region\\is  allowed,  for\\then  denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored  at  addressv$  has  to  be  consistent  with  the  source  value,v,  as  described\nby   Rules 34   and   35.   (Recall   that   (r,o) # Pdom(s)   abbreviatesr# Dom(s)7\no# Dom(s(r)).)  Rule 38  says  that  consistency  of  environments  is  the  pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust  be  consistent  atTE.  The  region  environmentRmay  be  regarded  as  the\nregion  environment  which  is  in  force  when  the  closures  are  applied;  as  we  saw\nearlier,  this  is  not  necessarily  the  same  as  the  region  environment  which  was  in\nforce  when  the  target  closure  was  created  (R$  in  the  rule).  For  the  purpose  of  the\nsoundness  theorem,  we  clearly  need  to  know  thatRandR$  are  related  somehow,\nand  it  turns  out  that  it  suffices  to  require  that  they  agree  on..  The  condition\nfrv(e$)\u001f(R$)  ensures  that  the  target  closure  contains  no  free  region  variables;  the\ntwo first  premises of  the rule already  ensure that fpv(e$ )\u001fDom(VE),  i.e.,  that  the\nclosure  contains  no  free  program  variables.  Again  this  is  good  hygiene,  which  is\nuseful in  the proofs  (specifically of Lemma 8.3).\nRule 33  is  similar  to  Rule 32,  but  deals  with  recursion.  For  the  premises  to  be\nsatisfied,TEmush  havefin  its  domain.  Moreover,  since  recursion  is  handled  by\nunfolding  in  the  source  language  semantics,  it  isE+[f[(x,e,E,f)]andVE\nthat have to be  consistent,  rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures  at  compound  type  schemes.  For  simple  type  schemes,  one  uses  Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2940 Signs:  1754 . Length:   52 pic 10 pts,   222 mm\nType Schemes  and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C),  wherePmeans powerset  andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe  members  ofCare  referred  to  as  (consistency)claims.  We  use#to  range  over\nclaims  and1to  range  over  sets  of  claims.  For  example,  a  claim  of  the  form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource  valuevand  has  type  scheme_and  resides  at\\in  the  storesand  region\nenvironmentR,  with respect to effect..\nNote   that   (P(C), \u001f)   is   a   complete   lattice.   We   now   define   an   operator\nF:P(C)\u0014P(C).  The  definition  is  expressed  using  the  syntax  of  inference  rules,\nbut  it  could  equally  well  be  expressed  as  a  non-recursive  definition  by  cases;  for\ngiven1\u001fC,F(1)  is defined as the unique set[##C|##F(1) can be inferred by\none  of  the  inference rules]. Since  the rules  are very  similar  to  rules 31\u001538 we  shall\nnot explain them further.\n138\nTOFTE  AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2699 Signs:  1330 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes  and  Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis  monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus,  by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint  is  also  the  greatest  set1satisfying1\u001fF(1).  Let1\n*\nbe  this  greatest  fixed\npoint.\nDefinition7.1.    We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3395 Signs:  2587 . Length:   52 pic 10 pts,   222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of  claims is  consistent,  (i.e.,  that1\u001f1\n*\n)  it suffices to prove1\u001fF(1).\n8.  PROPERTIES  OF CONSISTENCY\nIn  this  section  we  prove  important  lemmas  about  the  consistency  relationC.\nBesides  being  useful  in  the  proof  of  the  main  theorem  (Theorem 6.1)  they  address\nissues  such  as  why  it  is  safe  to  re-use  a  de-allocated  region  even  when  there  are\ndead pointers  into it.  The lemmas will  be proved  using a special style of co-induc-\ntive proof,  which we call rule-based co-induction.\n8.1.  Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent  a  co-inductive  proof  in  a  form  which  resembles  ordinary  induction  on  depth\nof  inference.  The  scenario  is  that  a  set,C,  is  given,  together  with  an  operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na  finite  set  of  inference  rules  (in  our  case,  Rules  39\u001546).  Let1\n*\nbe  the  maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat,  for some  given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof  the  lemma.  Then  (47)  can  be  stated1\nR\n\u001f1\n*\n.  By  the  principle  of\nco-induction,  it  suffices to  prove1\nR\n\u001fF(1\nR\n),  i.e.,  that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules  that  defineF,using  only  premises  which  are  themselves  consequences  of  the\nlemma.  Often,  this  is  proved  by  a  case  analysis  on#(note:  not#$ ),  since##1\n*\nimplies  that#can  be  inferred  by  an  application  of  one  of  the  rules  that  defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent   to   inferring#$#1\n*\n,   using   the   fixed-point   rules   forF(in   our   case:\nRules 31\u001538)  and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints,  using the fixed  point rules forFrather than the rules that defineF.\nWe  name  this  style  of  co-inductive  proofrule-based  co-induction.  We  emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive  proof  establishes  claims  that  are  not  conclusions  of  any  finite  proof\ntree constructed  by the fixed point rules.\n140\nTOFTE  AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3101 Signs:  2084 . Length:   52 pic 10 pts,   222 mm\n8.2.  Preservation of Consistency\nThe  first  lemma  states  that  consistency  is  preserved  under  decreasing  effect  and\nincreasing  store.  This  is  to  be  expected:  it  is  easier  to  obtain  consistency  with\nrespect  to  an  observer  if  the  observer  observes  a  little  rather  than  a  lot;  and  the\nlarger  the  store  is,  the  easier  it  is  for  it  to  contain  bits  of  target  values  which  are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ )  w.r.t..\n2\n.\nLemma 8.1 is  a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand  .\n2\n\u001f.\n1\nand  R\n2\nand  R\n1\nagree  on\n.\n2\nand  s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly  for\nthe other forms ofC.\nNotice  that  the  domain  ofs\n1\nneed  not  be  a  subset  of  the  domain  ofs\n2\nfor\nLemma 8.2  to  apply.  This  is  crucial  in  the  proof  of  the  main  theorem,  in  the  case\nforletregion.  Heres\n1\nwill  be  the  store  resulting  from  a  computation  which\ninvolves  local  regions;s\n2\nwill  be  the  result  of  removing  the  local  regions  froms\n1\n.\nThe region variables  that are free in.\n1\n, but not in.\n2\n,  will be the variables of the\nlocal regions.\nProof.We  prove  Lemma 8.2  and  the  corresponding  statements  concerning  the\nother  forms  of  consistency  by  rule-based  co-induction.  The  cases  for  the  inference\nrules  (31)  to  (38)  are  arranged  according  to  judgement  forms.  In  all  cases,  we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks  in Section 8 it  suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe  inferred  using  Rules 31\u001538,  from  premises  which  are  themselves  conclusions  of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly  if  either  the ``premises''  (i.e.,  the  formulae  above  the  line)  of  Rule 31  hold,  or\nthe  premises  of  Rule 32  hold,  or  the  premises  of  Rule 33  hold.  We  deal  with  each\ncase in turn:\n[Rule 31].Here+=(int,\\),  for  some\\,  andv=sv=i,  for  somei# Int.  But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n,  by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3153 Signs:  1750 . Length:   52 pic 10 pts,   222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E),  andsv=(x,e$,VE,R$).  But  then,  by  (54),  (48)  and\n(49)  we have\nR$  andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus,  using  also  (48)  and  (53),  we  have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis  a  consequence  of  the  lemma.\n2\nThus  by  Rule 32  on  (52),  (55)  and  (56)  we  have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired  (since (56)  is a consequence of the lemma).\n[Rule 33].Similar  to the previous case.\nType   Schemes   and   Storable   Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t.\n2\nis  a  consequence  of  the  lemma.  But  then,  by\nRule  35,  we  haveC(R\n2\n,(_,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  as  required  (since  the  premise\nused,  i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv)  w.r.t..\n2\n,  is a consequence of the lemma).\nType Schemes  and  Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule  36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n,  so  by  (36)  there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy  (49)  on (58)  we have\nR\n2\n(\\)=r(61)\n142\nTOFTE  AND TALPIN\n2\nStrictly  speaking,  we  should  say  ``we  have  that  the  claim  (R$,TE,E,s\n2\n,VE,.\n2\n)  is  a  consequence\nof the  lemma'',  but  the chosen  formulation  seems easier  to read,  so we  adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3240 Signs:  2227 . Length:   52 pic 10 pts,   222 mm\nThus (59)  and  (50)  give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy   (60),   (48),   (49)   and   (50)   we   have   that   the   claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis  a consequence  of the lemma; i.e.,  by (62),  that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis   a   consequence   of   the   lemma.   Thus   Rule 36   on   (61),   (62),   and   (63)   gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\n,  since  the  premise  used  is  a  consequences  of  the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ )  w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE)  w.r.t..].The  case  for  Rule  38  is  straight-\nforward.\n8.3.  Region Renaming\nIn  order  to  prove  that  re-use  of  old  regions  is  safe  (Lemma 8.4),  we  shall  want\nto  rename  region  variables  that  occur  free  in  some  semantic  objectAbut  do  not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n,  written  Yield(S\nr\n),  is the  set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1.    LetAbe   a   semantic   object,   let.be   an   effect,   and   let\nS=(S\nt\n,S\nr\n,S\ne\n)  be  a  substitution.  We  say  thatSisaregion  renaming  ofAwith\nrespect   to.ifSafrv(A)   is   injective,   (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=<and\nSupp(S\ne\n) =Supp(S\nt\n)=<.\nIt is not in general the case thatC(R,+,v,s,v$ ) w.r.t..impliesC(R,S(+),v,s,v$)\nw.r.t..,  for  all  substitutionsS;  the reason  is  thatSmight map region  variables in\nthe  set  frv(+)\"frv(.)  to  variables  that  are  free  in.,  thereby  making  consistency\nharder to  achieve. However,  the following special case holds:\nLemma8.3.IfC(R,+,v,s,v$ )  w.r.t..  and  S  is  a  region  renaming  of  +  with\nrespect  to  .  thenC(R,S(+),v,s,v$ )  w.r.t...Similarly  for  the  other  consistency\njudgement forms.\nIntuitively:  as  far  as.is  concerned,  a  region  variable\\# frv(+)\"frv(.)  denotes\na garbage  region which  is  no different  from any  other garbage region!\nProof.By  rule-based  co-induction  onC(R,+,v,s,v$ ) w.r.t..(and  the  other\nconsistency judgement forms). The cases are ordered according to judgement forms.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].Assume thatSis a region\nrenaming of+with respect  to.and that\nC(R,+,v,s,sv) w.r.t...(64)\nNow  (64)  must be the conclusion of one of the following rules:\n143\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261336 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3591 Signs:  2402 . Length:   52 pic 10 pts,   222 mm\n[Rule 31].By  (64)  we  have+=(int,\\), for  some\\,  andv=sv# Int.  Thus\nC(R,S(+),v,s,sv)  w.r.t...\n[Rule 32].By  (64) there existTE,x,e,e$,R$,E,{,\\andVEsuch that\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](65)\nC(R$,TE,E$,s,VE) w.r.t..(66)\nR$ andRagree  on.frv(e$ )\u001fDom(R$)(67)\n+=({,\\),v=(x,e,E),sv=(x,e$,VE,R$),(68)\nwhereE$=E.  (The  reason  for  introducingE$  will  become  clear  later.)  To  prove\nC(R,S(+),v,s,sv)  w.r.t..we wish to  findTE\n0\n,R\n0\n,  ande$\n0\nsatisfying\nTE\n0\n|&*x.eO*x.e$\n0\natS(\\):S({,\\),[put(S(\\))](69)\nC(R\n0\n,TE\n0\n,E$,s,VE) w.r.t..(70)\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)(71)\nsv=(x,e$\n0\n,VE,R\n0\n)(72)\nand that  the  claim  (70)  is  itself  a  consequence  of  the  lemma.  Comparing  (65)  and\n(69),  a  tempting  idea  is  simply  to  applySthroughout  (65),  takinge$\n0\nto  beS(e$).\nHowever,Sis  not  necessarily  a  region  renaming  onTE,   so  (70)  would  not\nnecessarily be  a consequence of the lemma.\nTherefore,  let[\\\n1\n, ...,\\\nn\n]=frv(TE)\"frv(+,.)  and let[\\$\n1\n, ...,\\$\nn\n]be distinct new\nregion   variables,   new   in   the   sense   that[\\$\n1\n, ...,\\$\nn\n]&frv(S(+),.)=<.   Let\nS$=S+[\\\ni\n[\\\ni\n$|1\u001di\u001dn],  letTE\n0\n=S$(TE),  and  lete$\n0\n=S$(e$ ).  ThenS$isa\nregion renaming of (TE,+) with respect to.. Further,R\n0\nis defined as follows. Let\nDom(R\n0\n)  be  frv(e$\n0\n).  Since  (65)  must  have  been  inferred  by  Rule 23,  we  have\nfrv(e$ )\u001ffrv(TE,{).  ThusS$  is  injective  on  frv(e$ ).  Then  for  every  region  variable\n\\$ # frv(e$\n0\n)  there  exists  one  and  only  one  region  variable\\# frv(e$ )  such  that\nS$(\\)=\\$.  DefineR\n0\n(\\$ )  to  beR$(\\).  By  these  definitions,(x,e$,VE,R$)and\n(x,e$\n0\n,VE,R\n0\n)are equal. By Lemma 5.3 on (65) and the fact thatS$({,\\)=S({,\\)\nwe  obtain  (69),  as  desired.  Notice thatR\n0\nandR$  agree  on.,  sinceS$ is  a region\nrenaming with respect to.. Thus (71) also holds. Then,  by Lemma 8.2 on (66) we\nhaveC(R\n0\n,TE,E$,s,VE) w.r.t...  But  then,  sinceS$  is  a  region  renaming  ofTE\nwith respect to.we have that the claim (70) is itself a consequence of the lemma,\nas desired. Finally Rule 32 on (68)\u0015(72) givesC(R,S(+),v,s,sv) w.r.t.., as desired.\n[Rule 33].Almost identical to the previous case: useE$=E+[f[(x,e,E,f)]\nandv=(x,e,E,f)instead  ofE$=Eandv=(x,e,E).  Conclude  using  Rule 33\ninstead of using Rule 32.\nType  Schemes  and  Storable  Values[C(R,(_,\\),v,s,sv) w.r.t..].    Assume  that\n(_$,\\$)=S(_,\\),  thatSis a region renaming of  (_,\\)  with respect to.,  and that\nC(R,(_,\\),v,s,sv) w.r.t...(73)\nThen (73)  is the conclusion of one of the following rules:\n144\nTOFTE  AND TALPIN\n\nFile: 643J261337 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3171 Signs:  1660 . Length:   52 pic 10 pts,   222 mm\n[Rule  34].Then  there  existTE,f,x,e,e$,\\\n1\n}}}\\\nk\n,:\n1\n}}}:\nn\n,=\n1\n}}}=\nm\n,{,VE\nandR$ such that\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)](74)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nand    bv(_)&fv(TE,\\)=<\nR$  andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n](75)\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..(76)\nv=(x,e,E,f),sv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$).(77)\nAs   in   the   previous   two   cases,Sis   not   necessarily   a   region   renaming   of\nTE+[f[(_,\\)].    Let[\\\nold\n1\n, ...,\\\nold\nl\n]=([\\\n1\n, ...,\\\nk\n]_frv(TE,{))\"frv((_,\\),.).\nLet[\\\nnew\n1\n, ...,\\\nnew\nl\n]be   distinct   new   region   variables,   new   in   the   sense   that\n[\\\nnew\n1\n, ...,\\\nnew\nl\n]&frv(S(_,\\),.)=<. LetS$=S+([],[\\\nold\n1\n[\\\nnew\n1\n, ...,\\\nold\nl\n[\\\nnew\nl\n],[]).\nThen\nS$  is a region  renaming on ([\\\n1\n, ...,\\\nk\n],TE,{,\\) with respect to..(78)\nLetTE$=S$(TE)  and lete$\n0\n=S$(e$ ).  By Lemma 5.3 on  (74)  we have\nTE$+[f[(S$(_),\\$)]|&*x.eO*x.e$\n0\nat\\$: (S${,\\$),[put(\\$)],(79)\nwhere  we  have  usedS$(\\)=\\$.  SinceS$  is  the  identity  on  every  type  and  effect\nvariable,  we have\nS$(_)=\\S$\\\n1\n}}}S$\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.S$({).(80)\nMoreover,\n([S$\\\n1\n, ...,S$\\\nk\n],[:\n1\n, ...,:\nn\n],[=\n1\n, ...,=\nm\n])&fv(TE$,\\$)=<(81)\nsinceS$   is   injective   on   frv([\\\n1\n, ...,\\\nk\n],TE,\\).   DefineR\n0\nas   follows.   Let\nDom(R\n0\n)=frv(e$\n0\n)\"[S$(\\\n1\n), ...,S$(\\\nk\n)].  From  (74)  and  Rule  23  we  get  frv(e$)\u001f\nfrv(TE+[f[(_,\\)],{).  By  (78),  for  every\\$#e$\n0\nthere exists a  unique\\# frv(e$)\nsuch  thatS$(\\)=\\$.  LetR\n0\n(\\$)=R$(\\).  The  closures(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)\nand(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)are  now  equal.  Moreover,  by  (78),R\n0\nandR$\nagree on.. But  then,  by (75),  we have\nR\n0\nandRagree on.frv(e$\n0\n)\u001fDom(R\n0\n)_[S$\\\n1\n, ...,S$\\\nk\n].(82)\nBy Lemma 8.2  on  (76),  using thatR\n0\nandR$ agree on.,  we  get\nC(R\n0\n,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t...(83)\n145\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261338 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2999 Signs:  2002 . Length:   52 pic 10 pts,   222 mm\nNotice  thatS$  is  a  region  renaming  ofTE+[f[(_,\\)]with  respect  to..  Thus\nfrom  (83)  we get that  the claim\nC(R\n0\n,TE$+[f[(S$(_),\\$)],E+[f[v],s,VE) w.r.t..(84)\nis  a  consequence  of  the  lemma.  By  Rule 34  on  (79),  (80),  (81),  (82),  and  (84)  we\nhave\nC(R,(S$(_),\\$),(x,e,E,f),s,(S$\\\n1\n, ...,S$\\\nk\n,x,e$\n0\n,VE,R\n0\n)) w.r.t..,(85)\nwhich is the desired result.\n[Rule  35].By  (73)  and  Rule  35  we  have  that_is  simple  and  takes  the  form\n\\().{andC(R,({,\\),v,s,sv) w.r.t... Thus the claimC(R,S$({,\\),v,s,sv) w.r.t..\nis a consequence of  the lemma. ThusC(R,(S$(_),\\$),v,s,sv)  w.r.t..,  as desired.\nThe cases for the remaining rules  (Rules 36\u001538) are straightforward.\n8.4.  Region Allocation\nConsistency  is  not  in  general  preserved  under  increasing  effects  or  shrinking\nstores.  For  example,  for  all  addressesa,  we  haveC([\\[r],(int,\\), 3,[],a)\nw.r.t..if.=<, but not if.=[get(\\)],  since the store is empty. Yet there is one\npoint  where  we  do  need  to  increase  effects,  namely  in  the  case  of  the  main  proof\nconcerning expressions of the form\ne$#letregion\\ine$\n1\nend.\nStarting   from   an   assumption   of   the   formC(R,TE,E,s,VE) w.r.t..we   wish\nto   extendswith   a   new   region,   yieldings$=s+[r[[]],   increase.to\n._[put(\\),get(\\)](the  get  and  put  effects  representing  the  effects  ofe$\n1\non  the\nnew   region)   and   still   be   able   to   claimC(R+[\\[r],TE,E,s$,VE)   w.r.t.\n._[put(\\),get(\\)].  That  this  is  possible  is  not  trivial,  for  the  regionrmay  have\nbeen in use earlier (and there may even be dead pointers into the old region named\nr).  However,  if  we  extend  the  observing  effect  with  a  region  variable  which  is  not\nfree in the  type  environment,  then consistency  reallyispreserved:\nLemma8.4.IfC(R,TE,E,s,VE)  w.r.t..  and  \\\u0012frv(TE,.),r\u0012Dom(s)and\nfrv(.$)\u001f[ \\]    thenC(R+[\\[r],TE,E,s+[r[[]],VE)    w.r.t..$_..\nSimilarly for  the  other forms ofC.\nProof.The proof  is by rule-based co-induction. We assume\nfrv(.$)\u001f[\\](86)\nr\u0012Dom(s).(87)\nFor  brevity,  lets$=s+[r[[]].  We  now  have  a  case  analysis  with  one  case  for\neach of Rules 31 to 38.\n146\nTOFTE  AND TALPIN\n\nFile: 643J261339 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2489 Signs:  1340 . Length:   52 pic 10 pts,   222 mm\nTypes and Storable  Values[C(R,+,v,s,sv) w.r.t..].Assume\nC(R,({,\\\n0\n),v,s,sv) w.r.t..(88)\n\\\u0012frv(({,\\\n0\n),.).(89)\nThen (88)  is the conclusion of one of the following rules:\n[Rule 31].Herev=sv=i,  for somei# Int and{=int. HenceC(R+[\\[r],\n({,\\\n0\n),v,s$,sv) w.r.t.._.$ by Rule 31 itself.\n[Rule 32].Here  (88)  is inferred from premises\nTE|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)](90)\nC(R\n0\n,TE,E,s,VE) w.r.t..(91)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)(92)\nv=(x,e,E)andsv=(x,e$,VE,R\n0\n)(93)\nWithout loss of generality we can assume\n\\\u0012frv(TE)(94)\nfor  if\\# frv(TE)  we  can  do  the  following.  Let\\$  be  a  fresh  region  variable,  fresh\nin the sense that\\$\u0012frv(TE,.,{). Consider the substitutionS=[\\[\\$]. By (89)\nand Lemma 5.3  on (90) we have\nS(TE)|&*x.eO*x.S(e$)at\\\n0\n:({,\\\n0\n):[put(\\\n0\n)].(95)\nMoreover,Sis  a  region renaming  ofTEwith respect  to.,  so Lemma 8.3  on  (91)\ngives\nC(R\n0\n,S(TE),E,s,VE) w.r.t...(96)\nLetR$\n0\nbe  the  region  environment  defined  as  follows.  If\\\u0012Dom(R\n0\n)  then  let\nR$\n0\n=R\n0\n.   Otherwise   letR$\n0\nhave  domain  Dom(R$\n0\n) =Dom(R\n0\n)\"[\\]_[\\$]and\nvalues\nR$\n0\n(\\$\n0\n)=\n{\nR\n0\n(\\$\n0\n)\nR\n0\n(\\)\nif\\$\n0\n{\\\nif\\$\n0\n=\\$.\nLetsv$=(x,S(e$),VE,R$\n0\n).  Since  frv(e$) \u001fDom(R\n0\n)  we  have  thatsvandsv$  are\nequal  and  frv(S(e$)) \u001fDom(R$\n0\n).  Also,R$\n0\nandR\n0\nagree  on.(since  either\\$  nor\n\\is  free  in.). Thus by Lemma 8.2 on  (96)  we have\nC(R$\n0\n,S(TE),E,s,VE) w.r.t...(97)\nThus we  can assume that (94)  holds.\n147\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261340 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2681 Signs:  1386 . Length:   52 pic 10 pts,   222 mm\nBy  (91)  and  (94)  we have that  the claim\nC(R\n0\n+[\\[r],TE,E,s$,VE) w.r.t.._.$(98)\nis itself a conclusion of the lemma. Moreover,  from (92)  and (86) we have\nR\n0\n+[\\[r]andR+[\\[r]agree on._.$.(99)\nBy Rule 32 on (90), (98), (99) and the fact that frv(e$)\u001fDom(R\n0\n+[\\[r]) we get\nC(R+[\\[r],({,\\\n0\n),v,s$,sv$) w.r.t.._.$(100)\nwheresv$=(x,e$,VE,R\n0\n+[\\[r]).   By   (90)   and   Rule 23   we   have   frv(e$)\u001f\nfrv(TE,{)  so  by  (89)  and  (94)  we  have\\\u0012frv(e$).  Thussvandsv$  are equal;  thus\n(100)  is  the desired result.\n[Rule 33].Similar  to the previous case.\nType Schemes and Storable Values[C(R,(_,\\\n0\n),v,s,sv) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,sv) w.r.t..(101)\n\\\u0012frv((_,\\\n0\n),.),(102)\nwhere (101) must be  the conclusion of one of the following rules:\n[Rule 34].Here_is compound and there existTE,f,x,e,\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,\n=\n1\n, ...,=\nm\n,R\n0\n,  andVEsuch that\nTE+[f[(_,\\\n0\n)]|&*x.eO*x.e$at\\\n0\n:({,\\\n0\n),[put(\\\n0\n)](103)\n_=\\\\\n1\n}}}\\\nk\n\\:\n1\n}}}:\nn\n\\=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\\n0\n)=<(104)\nR\n0\nandRagree on.frv(e$)\u001fDom(R\n0\n)_[\\\n1\n,...,\\\nk\n](105)\nC(R\n0\n,TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s,VE) w.r.t..(106)\nv=(x,e,E,f)andsv=(\\\n1\n, ...,\\\nk\n,x,e$,VE,R\n0\n).(107)\nAs in the  case  for Rule 32  we may assume\n\\\u0012frv(TE+[x[(_,\\\n0\n)])(108)\nwithout loss  of  generality. By (106)  and (108) we get that the claim\nC(R\n0\n+[\\[r],TE+[f[(_,\\\n0\n)],E+[f[(x,e,E,f)],s$,VE) w.r.t.._.$\n(109)\nis a consequence of the lemma. LetR$\n0\n=R\n0\n+[\\[r]and letR$=R+[\\[r].By\n(105) and  (102) we have\nR$\n0\nandR$ agree on._.$(110)\n148\nTOFTE  AND TALPIN\n\nFile: 643J261341 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3296 Signs:  2091 . Length:   52 pic 10 pts,   222 mm\nThus by Rule 34  on  (103),  (110)  and (109)  we have\nC(R$, (_,\\\n0\n),v,s$,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)) w.r.t.._.$(111)\nFrom (103) and Rule 23 we have frv(e$) \u001ffrv(TE+[f[(_,\\\n0\n)],{). This with (108)\ngives  that  if\\# frv(e$)  then\\#[\\\n1\n, ...,\\\nk\n].  Thussvand(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$\n0\n)\nare equal,  so  (111)  really is the desired result.\n[Rule 35].Here_is simple. Write_in the form\\().{. Then\\\u0012frv(({,\\\n0\n),.),\nby  (102).  By  (101)  and  Rule 35  we  haveC(R,({,\\\n0\n),v,s,sv) w.r.t... But  then  the\nclaimC(R+[\\[r],({,\\\n0\n),v,s$,sv) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThusC(R+[\\[r],(_,\\\n0\n),v,s$,sv) w.r.t.._.$,  by Rule 35.\nType Schemes  and  Addresses[C(R,(_,\\\n0\n),v,s,v$) w.r.t..].    Assume that\nC(R,(_,\\\n0\n),v,s,v$) w.r.t..(112)\n\\\u0012frv(_,\\\n0\n,.).(113)\nThen (112) is  the conclusion of one of the following rules:\n[Rule 36].HereR(\\\n0\n)=rofv$,v$ # Pdom(s)  and\nC(R,(_,\\\n0\n),v,s,s(v$)) w.r.t...(114)\nBy  (113)  we  have  (R+[\\[r])(\\\n0\n)=R(\\\n0\n)=rofv$.  Sincer\u0012Dom(s)  we  have\nv$ # Pdom(s$)   ands$(v$)=s(v$).   By   (114)   and   (113)   we   have   that   the   claim\nC(R+[\\[r],(_,\\\n0\n),v,s$,s$(v$)) w.r.t.._.$  is  a  consequence  of  the  lemma.\nThen, by Rule 36, we haveC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$, as desired.\n[Rule 37].Sinceget(\\\n0\n)\u0012.and (86)  and,  by  (113),\\{\\\n0\n,  we haveget(\\\n0\n)\u0012\n._.$. ThusC(R+[\\[r],(_,\\\n0\n),v,s$,v$) w.r.t.._.$,  by Rule 37 itself.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].    The  case  for  Rule 38  is  straight-\nforward.K\nLemma8.5.IfC(R,TE,E,s,VE) w.r.t.. thenC(R,TE,E,s,VE) w.r.t.._[=].\nSimilarly for  the  other forms ofC.\nProof.Straightforward  co-inductive proof.K\n8.5.  Recursion\nThe   source   and   target   languages   handle   recursion   differently.   The   source\nlanguage ``unrolls''  a  closure each  time  a recursive  function  is  applied\u0015\u0015see Rule 5.\nIn the target language a closure for a recursive function contains a pointer back to\nitself\u0015\u0015see Rule 14. To prove the correctness of our translation,  we must show that\nthe two representations are consistent at the point where we create the cycle in the\nstore.\nLemma8.6.IfC(R,TE,E,s,VE)w.r.t..  and   _  is  a  compound  type  scheme\n\\\\\u0011:\u0011=\u0011.{\n\u0014\n,withbv(_)&fv(TE,\\)=<,and   TE+[f[(_,\\)]|&*x.eO*x.e$at\\:\n149\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261342 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3081 Signs:  1915 . Length:   52 pic 10 pts,   222 mm\n({,\\),[put(\\)]  and  R$and  R  agree  on  .  andfrv(e$)\u001fDom(R$)_frv(\\\u0011)and\nR(\\)=r  and r# Dom(s)and o\u0012Dom(s(r))then\nC(R,TE+[f[(_,\\)],E+[f[(x,e,E,f)],\ns+[(r,o)[(\\\u0011,x,e$,VE$,R$)],VE$) w.r.t..,\nwhere VE$=VE+[f[(r,o)].\nProof.LetTE$=TE+[f[(_,\\)],E$=E+[f[(x,e,E,f)],VE$=VE+\n[f[(r,o)]ands$=s+[(r,o)[(\\\u0011,x,e$,VE$,R$)].  By  Lemma 8.2  it  suffices  to\nprove\nC(R$,TE$,E$,s$,VE$) w.r.t...\nThe proof is  by co-induction. Let\nq\n1\n=(R$, (_,\\),(x,e,E,f),s$,(\\\u0011,x,e$,VE$,R$),.)\nq\n2\n=(R$, (_,\\),(x,e,E,f),s$, (r,o),.)\nq\n3\n=(R$,TE$,E$,s$,VE$,.).\nLet1$=1\n*\n_[q\n1\n,q\n2\n,q\n3\n]and  show1$\u001fF(1$).  We  considerq\n1\n,q\n2\n,  andq\n3\nin\nturn.\n[q\n1\n].    Sinceq\n3\n#1$   and_=\\\\\u0011:\u0011=\u0011.{\n\u0014\n,   with   bv(_)&fv(TE,\\)=<,   andTE+\n[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]andR$  agrees  with  itself  on.\nand frv(e$)\u001fDom(R$)_frv(\\\u0011) we haveq\n1\n#F(1$),  by rule 42.\n[q\n2\n].    Ifget(\\)\u0012.thenq\n2\n#F(1$), by Rule 45. Assumeget(\\)#.. SinceRand\nR$ agree on.we haveR$(\\)=R(\\)=r. Since alsor# Dom(s$) andq\n1\n#1$ we have\nq\n2\n#F(1$),  by rule 44.\n[q\n3\n].    By Lemma 8.2 onC(R,TE,E,s,VE) w.r.t..we haveC(R$,TE,E,s$,VE)\nw.r.t... Thus Dom(TE)=Dom(E)=Dom(VE) and for everyx# Dom(TE) we have\nC(R$,TE(x),E(x),s$,VE(x)) w.r.t..,  i.e.,  forx{f,C(R$,TE$(x),E$(x),s$,VE$(x))\nw.r.t... Since alsoq\n2\n#1$ we haveq\n3\n#F(1$) by Rule 46.\n9.  PROOF  OF THE CORRECTNESS  OF THE TRANSLATION\nThis section is the proof of Theorem 6.1. The proof is by depth of the derivation\nofE|&e\u0014v,   each   with   an   inner   induction   on   the   depth   of   inference   of\nTE|&eOe$:+,.. There are seven cases, one for each rule in the dynamic semantics\nof  the  source  language.  For  each  of  these  cases,  the  inner  induction  consists  of  a\nbase  case,  in  whichTE|&eOe$:+,.was  inferred  by  one  of  thesyntax-directed\nrules  (i.e.,  rules 20\u001526) plus  an inductive step,  where  Rule 27 or 28  was applied. It\nturns  out  the  the  inner  inductive  steps  are  independent  ofe,  so  we  start  out  by\ndoing  them  once  and  for  all.  Then  we  deal  with  each  of  the  seven  syntax-directed\ncases.\n150\nTOFTE  AND TALPIN\n\nFile: 643J261343 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2075 Signs:   925 . Length:   52 pic 10 pts,    222 mm\nIn all the cases,  we assume\nTE|&eOe$:+,.(115)\nC(R,TE,E,s,VE) w.r.t.._.$(116)\nE|&e\u0014v(117)\nRconnects._.$tos(118)\nR$ andRagree on._.$(119)\nfrv(e$)\u001fDomR$.(120)\n[Inner  inductive  step  (a):  Rule 27  was  applied].    Assume  that  (115)  takes  the\nform\nTE|&eOletregion\\ine$\n1\nend:+,.(121)\nand is  inferred by Rule 27 from the premises\nTE|&eOe$\n1\n:+,.\n+\n(122)\n.=.\n+\n\"[put(\\),get(\\)](123)\n\\\u0012frv(TE,+).(124)\nBy  Lemma 5.3  we  can  choose\\such  that\\\u0012frv(.$)  as  well  as  (123)\u0015(124).\nThus\\\u0012frv(TE,._.$).  Letrbe  an  address  satisfyingr\u0012Dom(s).  LetR\n+\n=\nR+[\\[r]ands\n+\n=s+[r[[]].  Then by Lemma 8.4 on  (116)  we get\nC(R\n+\n,TE,E,s\n+\n,VE) w.r.t..\n+\n_.$.(125)\nLetR$\n+\n=R$+[\\[r]. By  (118) we have\nR\n+\nconnects.\n+\n_.$tos\n+\n(126)\nand by (119)\nR$\n+\nandR\n+\nagree on.\n+\n_.$.(127)\nBy  (120)  we have\nfrv(e$\n1\n) \u001fDomR$\n+\n.(128)\nBy the inner induction applied to (122),  (125),  (117),  (126),  (127),  and (128) there\nexists$\n1\nandv$  such that\ns\n+\n,VE,R$\n+\n|&e$\n1\n\u0014v$,s$\n1\n(129)\nC(R$\n+\n,+,v,s$\n1\n,v$) w.r.t..$(130)\n151\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261344 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3107 Signs:  2038 . Length:   52 pic 10 pts,   222 mm\nLets$=s$\n1\n\"\"[r]. Rule 15 on  (129)  gives\ns,VE,R$|&letregion\\ine$\n1\nend\u0014v$,s$\nNote thatR$\n+\nandR$ agree on.$(as\\\u0012frv(.$)). Also,s$\n1\na(Rng(R$afrv(.$)))C\n=\ns$\nby (118) and (119). Then by Lemma 8.2 on (130) we getC(R$,+,v,s$,v$) w.r.t..$, as\nrequired.\n[Inner  inductive  step  (b):  Rule 28  was  applied].    Assume  that  (115)  is  inferred\nby  Rule 28  on  premisesTE|&eOe$:+,.\n+\n,.=.\n+\n\"[=]and=\u0012fev(TE,+).  By\nLemma 8.5  on  (116)  we  getC(R,TE,E,s,VE) w.r.t..\n+\n_.$.  Also,Rconnects\n.\n+\n_.$tos;R$ andRagree on.\n+\n_.$ and frv(e$)\u001fDom(R$). Thus by the inner\ninduction  there  exists$  andv$ such thats,VE,R$|&e$\u0014v$,s$ andC(R$,+,v,s$,v$)\nw.r.t..$,  as desired.\n[The syntax-directed cases].\n[Constant, Rule 1].    SinceRconnects[put(\\)]_.$tosandR$ andRagree\non[put(\\)]_.$ we have thatr=R$(\\) exists andr# Dom(s). Takeo\u0012Dom(s(r)).\nBy   Rule 8   we   then   haves,VE,R$|&cat\\\u0014(r,o),s+[(r,o)[c].   Letting\nv$=(r,o) ands$=s+[(r,o)[c]we furthermore getC(R$, (int,\\),v,s$,v$) w.r.t.\n.$,  by  (36),  (35) and  (31),  as desired.\n[Variable, Rule 2].    There are two cases, depending on whetherTEassociates\na simple or a compound type scheme with the variable. We deal with each of these\nin turn:\n[Variable with simple type scheme].Assume that (115) was inferred using\nRule 21. Thene=e$=x, for some variablex. Moreover,TE(x)=(_,p), for somep\nand simple_. Let{be the type for which_=\\().{. Then+=({,p) and.=<. The\nevaluation  (117)  must  have  been  by  Rule 2,  so  we  havev=E(x).  Lets$=s.By\n(115)   and   (116)   we   havex# Dom(VE).   Thus,   lettingv$=VE(x),   we   have\ns,VE,R$|&x\u0014v$,s$,  as  desired.  By  Rule 38  on  (116)  we  haveC(R,(_,p),v,s$,v$)\nw.r.t..$,  i.e.,C(R,({,p),v,s$,v$) w.r.t..$,  as  desired  (recall  that  we  identify\\().{\nand{).\n[Variable  with  compound  type  scheme].Assume  that  (115)  was  obtained\nby  Rule 22.  Theneis  a  variable,f;e$  is  of  the  formf[S(\\\n1\n), ...,S(\\\nk\n)]atpand\n+=({,p),  for some{; and\nTE|&fOf[S\\\n1\n, ..., S\\\nk\n]atp:({,p),.(131)\nwas inferred by  application of Rule 22  to the premises\nTE(f)=(_,p$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n(132)\n_\u001e{viaS(133)\n.=[get(p$),put(p)].(134)\n152\nTOFTE  AND TALPIN\n\nFile: 643J261345 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2685 Signs:  1472 . Length:   52 pic 10 pts,   222 mm\nThen (117) must  have been inferred by Rule 2,  so we havev=E(f). By  (116)  and\nf# Dom(TE) we have\nC(R,(_,p$),v,s,v$\n1\n) w.r.t.._.$\nwherev$\n1\n=VE(f).  Sinceget(p$) #.,  the  definition  ofC(rules  36  and  34)  gives\nv$\n1\n# Pdom(s)  androfv$\n1\n=R(p$) andvis a recursive closure\nv=(x\n0\n,e\n0\n,E\n0\n,f\n0\n)(135)\nands(v$\n1\n)=(\\\n1\n, ...,\\\nk\n,x\n0\n,e$\n0\n,VE\n0\n,R\n0\n),  for  somee$\n0\n,VE\n0\nandR\n0\n.  Furthermore,\nthere existTE\n0\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\nand{\n0\nsuch that\nC(R\n0\n,TE\n0\n+[f\n0\n[(_,p$)],E\n0\n+[f\n0\n[v],s,VE\n0\n) w.r.t.._.$(136)\nTE\n0\n+[f\n0\n[(_,p$)]|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp$: ({\n0\n,p$),[put(p$)](137)\nbv(_)&fv(TE\n0\n,p$)=<(138)\nR\n0\nandRagree on._.$(139)\nfrv(e$\n0\n) \u001fDomR\n0\n_[\\\n1\n, ...,\\\nk\n].(140)\nWithout loss of generality,  we can assume that\\\n1\n, ...,\\\nk\nare chosen so as to satisfy\n[\\\n1\n, ...,\\\nk\n]&frv(.$)=<.(141)\nBy  (134),  (118),  and  (119)  we  haveR$(p) # Dom(s).  Letr$=R$(p).  Leto$bean\noffset  not  in  Dom(s(r$)).  Letv$=(r$,o$),  letR$\n0\n=R\n0\n+[\\\ni\n[R$(S(\\\ni\n)); 1\u001di\u001dk]\nand  letsv=(x\n0\n,e$\n0\n,VE\n0\n,R$\n0\n).  Notice  thatR$(S(\\\ni\n))  exists,  by  (120).  Lets$=\ns+[(r$,o$)[sv]. It follows from Rule 10 that\ns,VE,R$|&f[S(\\\n1\n), ...,S(\\\nk\n)]atp\u0014v$,s$,(142)\nas desired.  It remains to prove that\nC(R$, ({,p),v,s$,v$) w.r.t..$(143)\nWe now consult Rules 31\u001538 concerningC.Ifget(p)\u0012.$, we are done. But even if\nget(p)#.$ we havev$ # Pdom(s$) androfv$=r$=R$(p) as required by Rule 36. It\nremains to  prove that\nC(R$, ({,p),v,s$,sv) w.r.t..$.(144)\nLetTE=TE\n0\n+[f\n0\n[(_,p$)].  Since  (137)  must  have  been  inferred  by  Rules 23\nand 28 we equally have\nTE|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:({,p),[put(p)](145)\n153\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261346 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2220 Signs:  1157 . Length:   52 pic 10 pts,   222 mm\nFrom (119),  (139)  and[\\\n1\n, ...,\\\nk\n]&frv(.$)=<we get\nR$\n0\nandR$ agree on.$.(146)\nFrom Lemma 8.2 on  (136)  we get\nC(R$\n0\n,TE,E\n0\n+[f\n0\n[v],s$,VE\n0\n) w.r.t..$.(147)\nFrom (140) we get\nfrv(e$\n0\n) \u001fDomR$\n0\n.(148)\nBy  Rule 33  on  (145),  (146),  (147),  and  (148)  we  haveC(R$, ({,p),v,s$,(x\n0\n,e$,\nVE\n0\n,R$\n0\n)) w.r.t..$  as desired.\n[Lambda  abstraction,  Rule 3].Assume  that  (115)  was  inferred  by  Rule 23;\nthen  (115)  takes the following form:\nTE|&*x.e\n1\nO*x.e$\n1\natp:+,[put(p)].(149)\nMoreover,  (117) was inferred by Rule 3 yielding\nv=(x,e\n1\n,E).(150)\nSinceRconnects.toswe haveR(p) # Dom(s). Letr=R(p) and letobe an offset\nnot  in  Dom(s(r)).  Letv$=(r,o)  ands$=s+[v$[(x,e$\n1\n,VE,R$)].  By  (119)  we\nhaveR$(p)=r.  Thus by Rule 11 we have\ns,VE,R$|&*x.e$\n1\natp\u0014v$,s$.(151)\nNotice  thatC(R$,TE,E,s$,VE)  w.r.t..$,  by  Lemma 8.2  and  (119).  Also  frv(e$\n1\n)\u001f\nDomR$,   by   (120).   Thus   by   Rules   32,   35,   and   36   (or   by   (37))   we   have\nC(R,+,v,s$,v$) w.r.t..$ as required.\n[Application  of  non-recursive  closure,  Rule 4].    Heree#e\n1\ne\n2\n,  for  somee\n1\nande\n2\n, ande$#e$\n1\ne$\n2\n,  for somee$\n1\nande$\n2\nand (115) was inferred by Rule 24 on the\npremises\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(152)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(153)\n.=.\n1\n_.\n2\n_[=,get(p)]_.\n0\n.(154)\n154\nTOFTE  AND TALPIN\n\nFile: 643J261347 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2703 Signs:  1229 . Length:   52 pic 10 pts,   222 mm\nMoreover,  (117)  was inferred by Rule 4 on the premises\nE|&e\n1\n\u0014v\n1\n,v\n1\n=(x\n0\n,e\n0\n,E\n0\n)(155)\nE|&e\n2\n\u0014v\n2\n(156)\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(157)\nLet.$\n1\n=.\n2\n_[=,get(p)]_.\n0\n_.$,  i.e.,  the  effect  that  remains  after  the  computa-\ntion ofe$\n1\n.  Note that._.$=.\n1\n_.$\n1\n;  so from  (116),  (118),  and (119) we get\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(158)\nRconnects.\n1\n_.$\n1\ntos(159)\nR$ andRagree on.\n1\n_.$\n1\n.(160)\nAlso,  from  (120),  we get\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(161)\nBy  induction  on  (152),  (158),  (155),  (159),  (160),  and  (161)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(162)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(163)\nNotice   thatget(p)#.$\n1\n.   Thus,   by   the   definition   ofC,   (163)   tells   us   that\nv$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and there existe$\n0\n,VE\n0\n,TE\n0\nandR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(164)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](165)\nC(R\n0\n,TE\n0\n,E\n0\n,s\n1\n,VE\n0\n) w.r.t..$\n1\n(166)\nR\n0\nandR$ agree on.$\n1\n(167)\nfrv(e$\n0\n) \u001fDomR\n0\n.(168)\nLet.$\n2\n=[=,get(p)]_.\n0\n_.$,  i.e.,  the effect that remains after the computation of\ne$\n2\n.  By  Lemma 4.1  on  (162)  we  havesC\n=\ns\n1\n.  Furthermore,  we  have.\n2\n_.$\n2\n\u001f\n._.$,  so by Lemma 8.1 on (116) we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(169)\nAlso,  from  (118) and  (119) we get\nRconnects.\n2\n_.$\n2\ntos\n1\n(170)\nR$ andRagree on.\n2\n_.$\n2\n.(171)\n155\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261348 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2402 Signs:  1225 . Length:   52 pic 10 pts,   222 mm\nBy  induction  on  (153),  (169),  (156),  (170),  (171),  and  (161)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(172)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(173)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (165)  must  have  been  inferred  by  Rules  23\nand 28.  Thus there exists a.$\n0\nsuch that.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(174)\nWe  have  s\n1\nC\n=\ns\n2\n,  by  Lemma 4.1  on  (172).  By  Lemma 8.2  on  (166),  (167),  and\n.$\n0\n\u001f.\n0\nwe then have\nC(R$,TE\n0\n,E\n0\n,s\n2\n,VE\n0\n) w.r.t..$\n0\n_.$(175)\nand by Lemma 8.1  on  (173) and.$\n0\n\u001f.\n0\nwe get\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n0\n_.$.(176)\nLetE\n+\n0\n=E\n0\n+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  Combining  (175)  and\n(176)  we get\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(177)\nAlso,  by  (118),  (119),  andsC\n=\ns\n2\nwe get\nR$ connects.$\n0\n_.$tos\n2\n(178)\nand by (167)\nR\n0\nandR$  agree on.$\n0\n_.$.(179)\nThen by induction on (174), (177), (157), (178),  (179), and (168) there exists$ and\nv$ such that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(180)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(181)\nFrom  (162),  (164),  (172),  and  (180)  we  gets,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as  desired.\nMoreover,  by Lemma 8.2 on (181) and (167),  we haveC(R$,+,v,s$,v$) w.r.t..$, as\ndesired.\n[Application  of  recursive  closure,  Rule 5].    This  case  is  similar  to  the  previous\ncase,  but we include it for the sake of completeness. We havee#e\n1\ne\n2\n,  for somee\n1\n156TOFTE  AND TALPIN\n\nFile: 643J261349 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2587 Signs:  1140 . Length:   52 pic 10 pts,   222 mm\nande\n2\n,  ande$#e$\n1\ne$\n2\n,  for  somee$\n1\nande$\n2\nand,  by  Rule 24,  there  exist+$,p,=,.\n0\n,\n.\n1\nand.\n2\nsuch that\nTE|&e\n1\nOe$\n1\n:(+$ww\u0014\n=..\n0\n+,p),.\n1\n(182)\nTE|&e\n2\nOe$\n2\n:+$,.\n2\n(183)\n.=.\n1\n_.\n2\n_.\n0\n_[get(p),=].(184)\nAlso,  assume  that  (117)  was inferred  by application  of Rule 5  on premises\nE|&e\n1\n\u0014v\n1\nv\n1\n=(x\n0\n,e\n0\n,E\n0\n,f)(185)\nE|&e\n2\n\u0014v\n2\n(186)\nE\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v.(187)\nTo use induction the first time,  we split the effect._.$ into.\n1\n_.$\n1\n,  where.$\n1\n=\n.\n2\n_.\n0\n_[get(p),=]_.$. By  (116),  (118)  and (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(188)\nRconnects.\n1\n_.$\n1\ntos(189)\nR$ andRagree on.\n1\n_.$\n1\n.(190)\nAlso,  by (120),  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(191)\nBy  induction  on  (182),  (188),  (185),  (189),  (190),  and  (191),  there  existv$\n1\nands\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(192)\nC(R$, (+$ww\u0014\n=..\n0\n+,p),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(193)\nNotice  thatget(p)#.$\n1\n.  Thus  by  (193)  and  the  rules  forC(Rules  33,  35,  and  36)\nwe  havev$\n1\n# Pdom(s\n1\n)  androfv$\n1\n=R$(p)  and  there  existe$\n0\n,VE\n0\n,TE\n0\n,  andR\n0\nsuch that\ns\n1\n(v$\n1\n)=(x\n0\n,e$\n0\n,VE\n0\n,R\n0\n)(194)\nTE\n0\n|&*x\n0\n.e\n0\nO*x\n0\n.e$\n0\natp:(+$ww\u0014\n=..\n0\n+,p),[put(p)](195)\nC(R\n0\n,TE\n0\n,E\n0\n+[f[v\n1\n],s\n1\n,VE\n0\n) w.r.t..$\n1\n(196)\nR\n0\nandR$ agree on.$\n1\n(197)\nfrv(e$\n0\n) \u001fDomR\n0\n.(198)\n157\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261350 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2304 Signs:  1080 . Length:   52 pic 10 pts,   222 mm\nTo use induction a second time, we split the remaining effect.$\n1\ninto.\n2\n_.$\n2\n, where\n.$\n2\n=.\n0\n_[get(p),=]_.$. We havesC\n=\ns\n1\n, by Lemma 4.1. Then, by Lemma 8.1 on\n(116),  we have\nC(R,TE,E,s\n1\n,VE) w.r.t..\n2\n_.$\n2\n.(199)\nMoreover,  (118) and  (119)  imply that\nRconnects.\n2\n_.$\n2\ntos\n1\n(200)\nR$ andRagree on.\n2\n_.$\n2\n.(201)\nBy  induction  on  (183),  (199),  (186),  (200),  (201),  and  (191)  there  exists\n2\nandv$\n2\nsuch that\ns\n1\n,VE,R$|&e$\n2\n\u0014v$\n2\n,s\n2\n(202)\nC(R$,+$,v\n2\n,s\n2\n,v$\n2\n) w.r.t..$\n2\n.(203)\nLetTE\n+\n0\n=TE\n0\n+[x\n0\n[+$].  Now  (195)  must  have  been  inferred  by  Rules  23  and\n28. Thus there exists  an effect.$\n0\nwith.$\n0\n\u001f.\n0\nand\nTE\n+\n0\n|&e\n0\nOe$\n0\n:+,.$\n0\n.(204)\nBy Lemma 8.2  on  (196)  and (197) we have\nC(R$,TE\n0\n,E\n0\n+[f[v\n1\n],s\n2\n,VE\n0\n) w.r.t..$\n2\n.(205).\nLetE\n+\n0\n=E\n0\n+[f[v\n1\n]+[x\n0\n[v\n2\n]and  letVE\n+\n0\n=VE\n0\n+[x\n0\n[v$\n2\n].  From  (205)\nand (203)  and.$\n0\n\u001f.\n0\nwe have\nC(R$,TE\n+\n0\n,E\n+\n0\n,s\n2\n,VE\n+\n0\n) w.r.t..$\n0\n_.$.(206)\nFrom (197) we get\nR\n0\nandR$  agree on.$\n0\n_.$.(207)\nBy  (118),  (119)  andsC\n=\ns\n2\nwe get\nR$  connects.$\n0\n_.$tos\n2\n.(208)\nBy  induction  on  (204),  (206),  (187),  (208),  (207),  and  (198)  there  exists$  andv$\nsuch that\ns\n2\n,VE\n+\n0\n,R\n0\n|&e$\n0\n\u0014v$,s$(209)\nC(R\n0\n,+,v,s$,v$) w.r.t..$.(210)\n158\nTOFTE  AND TALPIN\n\nFile: 643J261351 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2328 Signs:  1073 . Length:   52 pic 10 pts,   222 mm\nRule 12  on  (192),  (202),  (194),  and (209)  givess,VE,R$|&e$\n1\ne$\n2\n\u0014v$,s$,  as desired.\nMoreover,   Lemma 8.2   on   (210)   and   (207)   gives   the   desiredC(R$,+,v,s$,v$)\nw.r.t..$.\n[letexpressions,  Rule 6].    Assume  that  (115)  was inferred  by Rule 25;  then\n(115)  takes the form\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,..(211)\nMoreover,  (115)  and  (117)  must be inferred by Rules 25 and 6 from the premises\nTE|&e\n1\nOe$\n1\n:({\n1\n,p\n1\n),.\n1\n(212)\nTE+[x[({\n1\n,p\n1\n)]|&e\n2\nOe$\n2\n:+,.\n2\n(213)\n.=.\n1\n_.\n2\n(214)\nE|&e\n1\n\u0014v\n1\n(215)\nE+[x[v\n1\n]|&e\n2\n\u0014v.(216)\nLet.$\n1\nbe  the  effect  that  remains  after  the  evaluation  ofe$\n1\n;  i.e.,  let.$\n1\n=.\n2\n_.$.\nNote that._.$=.\n1\n_.$\n1\n,  so  by (116),  (118)  and  (119)  we have\nC(R,TE,E,s,VE) w.r.t..\n1\n_.$\n1\n(217)\nRconnects.\n1\n_.$\n1\ntos(218)\nR$ andRagree on.\n1\n_.$\n1\n.(219)\nBy  (120)  we have\nfrv(e$\n1\n)\u001fDomR$7frv(e$\n2\n)\u001fDomR$.(220)\nBy  induction  on  (212),  (217),  (215),  (218),  (219),  and  (220)  there  exists\n1\nandv$\n1\nsuch that\ns,VE,R$|&e$\n1\n\u0014v$\n1\n,s\n1\n(221)\nC(R$, ({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(222)\nBy Lemma 8.2  on  (222)  we get\nC(R,({\n1\n,p\n1\n),v\n1\n,s\n1\n,v$\n1\n) w.r.t..$\n1\n.(223)\nBy Lemma 8.1  on  (116)  we get\nC(R,TE,E,s\n1\n,VE) w.r.t..$\n1\n.(224)\n159\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261352 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2917 Signs:  1237 . Length:   52 pic 10 pts,   222 mm\nCombining these two,  we get\nC(R,TE+[x[({\n1\n,p\n1\n)],E+[x[v\n1\n],s\n1\n,VE+[x[v$\n1\n]) w.r.t..\n2\n_.$.   (225)\nBy  (118)  and (119)  andsC\n=\ns\n1\nwe have\nRconnects.\n2\n_.$tos\n1\n(226)\nR$  andRagree on.\n2\n_.$.(227)\nBy  induction  on  (213),  (225),  (216),  (226),  (227),  and  (220)  there  exists$  andv$\nsuch that\ns\n1\n,VE+[x[v$\n1\n],R$|&e$\n2\n\u0014v$,s$(228)\nC(R$,+,v,s$,v$) w.r.t..$.(229)\nHere  (229)  is  one  of  the desired  results. Moreover,  by  Rule 13 on  (221)  and (228)\nwe get the desireds,VE,R$|&letx=e$\n1\nine$\n2\nend\u0014v,s$.\n[letrec,  Rule 7].In this  case (115)  takes the form\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend:+,.(230)\nand is  inferred by application of Rule 26 to the premises\nTE+[f[(\\\\\n1\n}}}\\\nk\n=\u0011.{\n\u0014\n,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n(231)\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<(232)\nTE+[f[(_$,p)]|&e\n2\nOe$\n2\n:+,.\n2\n(233)\n.=.\n1\n_.\n2\n,(234)\nwhere\\\u0011=\\\n1\n}}}\\\nk\nand_$=\\:\u0011\\\u0011=\u0011.{. Moreover,  (117) was inferred by Rule 7 on the\npremise\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v.(235)\nSince (231) must have been inferred by Rules 23 and 28, we have.\n1\n=[put(p)].By\n(118) and  (119) we haveR$(p)=R(p) # Dom(s). Letr\n1\n=R(p). Leto\n1\nbe an offset\nwitho\n1\n\u0012Dom(s(r\n1\n)).  Letv\n1\n=(r\n1\n,o\n1\n).  LetVE$=VE+[f[v\n1\n]and  lets\n+\n=\ns+[v\n1\n[(\\\n1\n, ...,\\\nk\n,x,e$\n1\n,VE$,R$)]. By Lemma 5.4  on (231) we have that\nTE+[f[(_$,p)]|&*x.e\n1\nO*x.e$\n1\natp:({,p),.\n1\n.(236)\n160\nTOFTE  AND TALPIN\n\nFile: 643J261353 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3207 Signs:  1935 . Length:   52 pic 10 pts,   222 mm\nLetTE\n+\n=TE+[f[(_$,p)]and  letE\n+\n=E+[f[(x,e\n1\n,E,f)].  By  (120)  we\nhave\nfrv(e$\n1\n) \u001fDomR$_[\\\n1\n, ...,\\\nk\n]7frv(e$\n2\n)\u001fDomR$.(237)\nByLemma 8.6on(116),(232),(236),(119),and(237)wehave\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t.._.$.  Then by Lemma 8.1 we get\nC(R,TE\n+\n,E\n+\n,s\n+\n,VE$) w.r.t..\n2\n_.$.(238)\nAlso,  by  (118)  and  (119),  we get\nRconnects.\n2\n_.$tos\n+\n(239)\nR$  andRagree on.\n2\n_.$.(240)\nBy  induction  on  (233),  (238),  (235),  (239),  (240),  and  (237)  there  exists$  andv$\nsuch that\ns\n+\n,VE$,R$|&e$\n2\n\u0014v$,s$(241)\nC(R$,+,v,s$,v$) w.r.t..$.(242)\nFrom (241)  and  Rule 14 we get\ns,VE,R$|&letrecf[\\\n1\n, ...,\\\nk\n](x)atp=e$\n1\nine$\n2\nend\u0014v$,s$.(243)\nNow  (242)  and  (243) are the desired results.\nThis concludes the  proof of Theorem 6.1.\n10.  ALGORITHMS\nThe  algorithms  used  for  implementing  the  region  inference  rules  in  the  ML  Kit\nwill not be described here. We shall give a brief overview,  however. First,  ordinary\nML type inference is performed using Milner's algorithm W, extended to all of Core\nML.  The  output  of  this  phase  is  an  explicitly  typed  lambda  term,e\n0\n,  say.  Then\nregion inference  is  done  in  two  phases.  Firste\n0\nis  decorated  with  fresh  region and\neffect  variables  everywhere  a  region  and  effect  variable  will  be  required  in  an\nexplicitly  typed  version  the  fully  region  annotated  target  expression.  This  phase  is\ncalledspreading.  During  spreading,  every  recursive  functionfof type  scheme_\nML\n,\nsay,  is  given  the  most  general  type  scheme_\n0\nwhich  has_\nML\nas  its  projection  (in\nthe sense  of  Section 5.3).  For  example,  aletrec-boundint\u0014intfunction  will\nbe  given  type  scheme\\\\\n1\n\\\n2\n=.(int,\\\n1\n)w\u0014\n=.<\n(int,\\\n2\n).  The  spreading  phase  per-\nforms  the  unifications  suggested  by  the  inference  rules.  For  example,  the  two\noccurrences of+$ in Rule 24 suggest a unification of the types and places of operator\nand  operand.  Spreading  employs  rules  27  and  28  as  aggressively  as  possible  (i.e.,\nafter  every  application  of  rules  22,  24,  25,  and  26).  The  resulting  program,  call  it\n161\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261354 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3734 Signs:  2828 . Length:   52 pic 10 pts,   222 mm\ne\n1\n, is well annotated with regions, except for the fact that the type schemes assumed\nfor recursive functions may be too general, compared to the type schemes that were\ninferred for  the lambda  expressions which define the functions.\nThe second phase is calledfixed-point resolutionand takese\n1\nas input. For each\nrecursive  function  ine\n1\n,  the  region  inference  steps  (unification,  introduction  of\nletregions,  etc.)  are iterated,  using less and less general type schemes for the recur-\nsive functions, till a fixed point is reached. This is similar in spirit to Mycroft's algo-\nrithms for  full polymorphic  recursion [20].\nIt  is  possible  to  extend  the  notion  of  principal  unifiers  for  types  to  a  notion  of\nprincipal  unifier  for  region-annotated  types,  even  though  region-annotated  types\ncontain  effects.  This  relies  on  invariants  about  arrow  effects  that  were  outlined  in\nSection 5.1. One can prove that every two types{\n1\nand{\n2\nthat have the same under-\nlying ML type have a most general unifier,  provided all the arrow effects in{\n1\nand\n{\n2\nsatisfy the invariants.\nThe  reason  for  the  separation  of  spreading  and  fixed-point  resolution  is  that,\nunless one takes care, the iteration used to handle the polymorphic region recursion\ndoes not terminate.  In particular,  there is a danger of arrow effects that grow ever\nlarger,  as  more  fresh  region  and  effect  variables  are  generated.  The  division  into\nspreading  and  fixed-point  resolution  solves  this  problem  by  only  generating  fresh\nvariables  during  the  spreading  phase.  It  can  then  be  shown  that  the  second  phase\nalways terminates. This approach does not always give principal types, for there are\ncases  where  that  function  in  the  fixed-point  resolution  which  is  responsible  for\nforming  type  schemes  is  refused  the  opportunity  to  quantify  region  and  effect\nvariables even though it is permitted by the inference rules. When this happens, the\nimplementation  simply  prints  a  warning  about  the  possible  loss  of  principal  types\nand  continues  with  a  less-than-principal  type  scheme.  Fortunately,  this  happens\nrather infrequently  in  practice,  and since  the  soundness  result  of the present paper\nshows the  correctness  forallderivationsTE|&eOe$:+,.,  safety is not violated.\n11.  LANGUAGE  EXTENSIONS\nIn  this  section  we  outline  some  of  the  extensions  that  have  been  made  to  the\nregion  inference  rules  in  order  to  handle  references,   exceptions,   and  recursive\ndatatypes in the  ML Kit.\n11.1.  References\nAssume  primitivesref,  !,  and  :=  for  creating  a  reference,  de-referencing,  and\nassignment,  respectively.  For  the  purpose  of  region  inference,  these  can  be  treated\nas variables with the  following type schemes:\nref:\\:\\\n1\n\\\n2\n=.(:,\\\n1\n)wwww\u0014\n=.[put(\\\n2\n)]\n((:,\\\n1\n)ref,\\\n2\n)\n!:\\:\\\n1\n\\\n2\n=.((:,\\\n1\n)ref,\\\n2\n)wwww\u0014\n=.[get(\\\n2\n)]\n(:,\\\n1\n)\n:= :\\:\\\n1\n\\\n2\n\\\n3\n\\\n4\n=.(((:,\\\n1\n)ref,\\\n2\n)V(:,\\\n1\n),\\\n3\n)wwwwww\u0014\n=.[put(\\\n2\n),put(\\\n4\n)]\n(unit,\\\n4\n).\n162\nTOFTE  AND TALPIN\n\nFile: 643J261355 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  3440 Signs:  2772 . Length:   52 pic 10 pts,   222 mm\nThe  most  interesting  of  these  is  assignment.  The  new  contents  of  the  reference  is\nrepresented by a pointer  (or by a word,  if the value is in unboxed  representation).\nThe  assignment  updates  the  reference  with  this  pointer  (or  word).  Thus  there  is  a\nputeffect on the region where the reference resides. The assignment does not make\na  copy  the  stored  value.  Thus  assignment  is  a  constant  time  operation,  but  the\ndownside is that the old and the new contents must be in the same regions (see the\ntwo occurrences  of\\\n1\nin the type  for := ). Thus,  for values with boxed representa-\ntion,  all  the different  contents  of  the  reference will  be kept alive  for  as long as  the\nreference is live. In ``mostly functional'' programs this does not seem to be a serious\nproblem  and  even  if  there  are  many  side-effects,  one  can  still  expect  reasonable\nmemory   usage   as   long   as   the   references  are   relatively  short-lived.   Long-lived\nreferences that contain boxed values and are assigned freshly created contents often\nare hostile to region inference.\n11.2.  Exceptions\nOur   approach   here   is   simple-minded:   exception   values   are   put   into   global\nregions. Every  evaluation of an  exception declaration gives rise to an allocation in\nsome  global  region.  Application  of  a  unary  exception  constructor  to  an  argument\nforces  the  argument  to  be  in  global  regions  as  well.  Thus  if  one  constructs  many\nexception values using unary exception constructors, one gets a space leak (indeed,\nthe space leaking region\\\n122\nin Fig. 5 contains constructed exception values). If one\nuses  nullary  constructors  only,  there  is  only  going  to  be  one  allocation  for  each\nevaluation of each exception declaration.\n11.3.  Recursive Datatypes\nSo  far,  every  type  constructor  has  been  paired  with  one  region  variable.  For\nvalues  of  recursive  datatypes,  additional  region  variables,  the  so-calledauxiliary\nregion  variables,  are  associated  with  type  constructors.  For  example,  consider  the\ndeclaration of thelistdatatype:\ndatatype  'a  list=nil|::of  'aV'a list\nTheregion-annotatedversionofthetype:listtakestheform\n(:,\\\n1\n)(list\n[\\\n2\n]\n,\\\n3\n), where\\\n1\nstands for a region which contains the list elements,\n\\\n3\ncontains  the  spine  of  the  list  (i.e.,  the  constructorsniland  : :),  and\\\n2\nis  an\nauxiliary region which contains the pairs, to which : : is applied. Thus lists are kept\n``very boxed'': in region\\\n3\nevery cons cell takes up two words, the first a tag (saying\n``I am cons'') and the second a pointer to the pair to which : : is applied. The region\n\\\n2\nis called auxiliary because it holds values which are internal to the datatype dec-\nlaration;  there  will  be  one  auxiliary  region  for  each  type  constructor  or  product\ntype  formation  in  each  constructor  in  the  datatype.  However,  all  occurrences  of\n163\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261356 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2910 Signs:  1816 . Length:   52 pic 10 pts,   222 mm\nthe  type  constructor  being  declared  are  put  in  the  same  region.  Hence  : :  receives\ntype\n\\\\\n1\n\\\n2\n\\\n3\n:.((:,\\\n1\n)V((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n),\\\n2\n)wwww\u0014\n=.[put(\\\n3\n)]\n((:,\\\n1\n)list\n[\\\n2\n]\n,\\\n3\n).\nSequential datatype declarations pose an interesting design problem:\ndatatype t1=C of int\ndatatype t2=C of t1Vt1\ndatatype t3=C of t2Vt2\n}}}\ndatatypet\ni\n=Coft\ni&1\nVt\ni&1\n}}}\nIn  the  declaration  oft\ni\n,  should  one  give  the  two occurrences  oft\ni&1\non  the  right-\nhand  side  the  same  or  different  regions?  If  one  gives  them  the  same  regions,  one\nintroduces  unnecessary  sharing;  if  one  gives  them  different  regions,  the  number  of\nauxiliary  region  variables  grows  exponentially  ini,  potentially  leading  to  slow\nregion  inference.  A  third  possibility  is  to  put  a  limit  on  the  number  of  auxiliary\nregion variables one will allow. We have chosen the third solution, but a systematic\nempirical study  of different  solutions has not been conducted.\n12.  STRENGTHS AND WEAKNESSES\nThe region inference rules were first implemented in a prototype system [26] and\nthen  in  the  ML  Kit  [5].  Neither  of  these  systems  uses  garbage  collection.  This\nsection  records  some  of  the  experience  gained  from  these  systems,  with  special\nemphasis  on  how  details  of  the  region  inference  rules  influence  memory  manage-\nment.  We  first  illustrate  consequences  of  the  region  inference  rules  by  a  series  of\nsmall,  but  complete,  examples.  Then  we  report  a  few  results  from  larger  bench-\nmarks run on the ML Kit. Throughout, we use Standard ML syntax [19]; roughly,\nfunis translated  intoletrecandvalintolet.\n12.1.  Small Examples\nThe  examples  are  grouped  according  to  the  general  point  they  are  intended  to\nmake.\n12.1.1.Polymorphic Recursion\nGenerally  speaking,  polymorphic  region  recursion  favours  recursive  functions\nthat  have  a  balanced  call  tree  (as  opposed  to  an  iterative  computation,  where  the\n164\nTOFTE  AND TALPIN\n\nFile: 643J261357 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes:  2376 Signs:  1439 . Length:   52 pic 10 pts,   222 mm\ncall tree is a list). We illustrate this with two examples. The first is the exponential\nversion of  the Fibonacci function:\nfun fib n=if n<=1 then 1 else fib(n&2)+fib(n&1)\nval fib15=fib 15;\nDue  to  region  polymorphism,  the  two  recursive  calls  offibuse  different  regions,\nlocal to the body  (see  Fig. 2).  The memory  usage  appears in Fig. 4.\nThe next example, calledreynolds2[5], is a depth-first search in a tree, using\na predicate to  record  the path  from the root  to the present node:\ndatatype 'a tree=\nLf\n|Brof'aV'a treeV'a tree\nfun mk\n&\ntree 0=Lf\n|mk\n&\ntree n=let val t=mk\n&\ntree(n&1)\nin Br(n, t, t)\nend\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif p (x) then true\nelse search (fn yOy=x orelse p (y)) t1\norelse\nsearch (fn yOy=x orelse p y) t2\nval reynolds2=search (fn\n&\nOfalse) (mk\n&\ntree 20)\nDue  to  the  polymorphic  recursion,  the  recursive  call  ofsearchdoes  not  put  the\nclosures  for(fn  yOy=x  orelse  p  (y))in  the  same  region  asp,  so  the  space\nusage  will  be  proportional  to  the  depth  of  the  tree.  This  leads  to  good  memory\nutilisation (Fig. 4).\nFIG.  4.Memory  used  in  running  sample  programs  on  the  ML  Kit  with  Regions,  Version  29a3:\n(i)  maximal  space  (in bytes)  used  for  variable  size  regions  (one  region page  is 800  bytes); (ii) maximal\nspace (in bytes) used for fixed size regions; (iii) maximal stack size during execution (in bytes); (iv) num-\nber  of  bytes  holding  values  at  the  end  of  the  computation  (regions  on  stack+data  in  variable  sized\nregions).\n165REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261358 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2801 Signs:  1913 . Length:   52 pic 10 pts,   222 mm\nBy contrast, consider the first-order variant, calledreynolds3, which uses a list\nto   represent   the   path.   It   is   obtained   by   replacing   thesearchfunction   of\nreynolds2by\nfun member(x, [])=false\n| member(x, x' : : rest)=\nx=x' orelse member(x, rest)\nfun search p Lf=false\n| search p (Br(x, t1, t2))=\nif member(x, p) then true\nelse search (x : : p) t1 orelse\nsearch (x : :  p) t2\nval reynolds3=search [] (mk\n&\ntree 20)\nAs  we  saw  in  Section 11,  region  inference  does  not  distinguish  between  a  list  and\nits tail,  so all cons cells (one for each node in the tree) are put in the same region.\nThis   gives   poor   memory   utilisation,   the   difference   fromreynolds2being\nexponential in  the  depth  of  the tree  (Fig. 4).  More  generally,  in connection  with a\nrecursive datatype, one should not count on polymorphic recursion to separate the\nlife-times  of  a  valuevof  that  type  and  other  values  of  the  same  type  contained\ninv.\n12.1.2.Tail Recursion\nAnother  common  pattern  of  computation  is  iteration.  This  is  best  implemented\nusing a  recursive function  whose type scheme takes the form\\:\u0011\\\u0011=\u0011.(+w\u0014\n=..\n+)  (note\nthat  the  argument  and  result  types  are  the  same,  even  after  region  annotation).\nSuch a function is called aregion endomorphism. Here is how to write a simple loop\nto sum the numbers 1 to 100:\nfun sum(p as (acc, 0))=p\n| sum(acc, n)=sum(n+acc, n&1)\nval sumit=*1(sum(0, 100));\nIn  ML,  all  functions  in  principle  take  one  argument,  in  this  case  a  tuple,  and\nthat  is  how  it  is  implemented  in  the  ML  Kit.  One  might  think  that  100  pairs\nwould pile  up in one  region; however,  an analysis called thestorage mode analysis\n[5]  discovers  that  the  region  can  be  reset  just  before  each  pair  is  written,  so\nthat in fact the region will only ever contain one pair. Memory usage is independent\nof  the  number  of  iterations,  in  this  example.  By  contrast,  the  non-tail-recursive\nversion\nfun sum' 0=0\n| sum' n=n+sum'(n&1)\nval sum'it=sum' 100\nuses stack space proportional to the number of iterations.\n166\nTOFTE  AND TALPIN\n\nFile: 643J261359 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2127 Signs:  1458 . Length:   52 pic 10 pts,   222 mm\nThe next program,appel1,  is  a variant  of a program in [2]:\nfun s(0)=nil\n| s(i)=0 : : s(i&1)\nfun length []=0\n| length(x : : xs)=1+length xs\nval N=100\nfun f(n,x)=\nlet val z=length x\nin if n=0 then 0 else f(n&1, s N)\nend\nval appel1=f(N, nil)\nHeref(n, nil) uses space3(N\n2\n), although3(N) should be enough. The problem\nis that at each iteration a list of lengthNis created, put in a fresh region, and then\npassed  to  the  recursive  call,  which  only  uses  the  list  to  computez.  The  list,\nhowever,  stays live till the end of the recursive call: Rule 23 and 27 tell us that the\n*-boundxwill  be  allocated  throughout  the  evaluation  of  the  body  off. The cure\nin this  case is  not to use the polymorphic  recursion:\nfun f(p as (n, x))=\nlet val z = length x\nin if n = 0 then 0 else f(if true then (n&1, s N) else p)\nend\nval appel2 = f(N, nil)\nNow  the  storage  mode  analysis  will  discover  that  the  region  containing  the  entire\nlist   can   be   reset   at  each   iteration;  this  is   tail   call  optimisation  for   recursive\ndatatypes!  The  above  transformation  is  a  rather  indirect  way  of  instructing  the\nregion inference algorithm that one does not want polymorphic recursion and if the\noptimiser  eliminated  the  conditional,  it  would  not  even  have  the  desired  effect.  It\nwould  probably  be  better  to  allow  programmers  to  state  their  intentions  directly.\nMemory consumption is in Fig. 7.\n12.1.3.Higher-Order  Functions\nIf  a  functionfis  lambda-bound,  it  is  not  region-polymorphic  (Rule 23).  For\nexample,  consider\n167\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261360 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2966 Signs:  1959 . Length:   52 pic 10 pts,   222 mm\nfun foldl f acc [] = acc\n| foldl f acc (x : : xs) = foldl f (f(acc, x)) xs\nfun concat list = foldl (op^)\"\"list\nfun blanks 0=[]\n| blanks n =\"\"::blanks(n&1)\nval N = 100\nval string1 = concat(blanks N)\nDespite the fact thatfoldlis region-polymorphic, the lambda-boundfis not,  so\nall applications of the concatenation operator  ^  inconcatwill put their results in\nthe  same  region,  leading  to3(N\n2\n)  space  usage.  To  obtain3(N)  space  usage,  one\nspecializesfoldlto   ^ ,  uncurries  the resulting function,  and turns it into a region\nendomorphism:\nfun concat' (p as (acc, [])) = p\n|concat'(acc, (x : : xs)) = concat'(acc^x, xs)\nfun concat(list) =*1(concat'(\"\",list))\nfun blanks 0 = []\n| blanks n =\"\"::blanks(n&1)\nval string2 = concat(blanks 100)\n12.2.  Larger Benchmarks\nA  number  of  benchmarks  from  the  New  Jersey  Standard  ML  benchmark  suite\nhave  been  ported  to  the  Kit  and  compared  (space  and  time  usage)  against  execu-\ntion  as  stand-alone  programs  under  Standard  ML  of  New  Jersey,  version 93.  The\nlargest  benchmark  is  Simple  (1148  lines),  a  program  which  originally  used  arrays\nof floating  point  numbers  extensively.  To  make  it  run  on  the  Kit  (which  does not\nsupport   arrays)   arrays   were   translated   into   lists   of   references,   so   the   ported\nprogram  is  probably  not  indicative  of  how  one  would  write  the  program  without\narrays  to  start  with.  Life  (252  lines)  uses  lists  very  extensively;  Mandelbrot  (170\nlines)  uses  floating  points  extensively;  Knuth-Bendix  (752  lines)  does  extensive\ndynamic allocation of  data structures that represent terms.\nInitially, programs often use more space when running on the Kit; for example,\nFig. 5  shows  a  region  profile  for  the  original  version  of  the  Knuth\u0015Bendix  bench-\nmark,  produced  using  Hallenberg's  region  profiler  [10a].  The  region  profiler  can\nalso pinpoint the program points which are responsible for space leaks. The source\nprogram  is  then  changed,  to  make  it  more  region  friendly.  Interestingly,  transfor-\nmations  that  are  good  for  region  inference  often  are  good  for  SML\u0012NJ  too  (see\n168\nTOFTE  AND TALPIN\n\nFile: 643J261361 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:   971 Signs:    437 . Length:   52 pic 10 pts,   222 mm\nFIG.  5.Region profile for Knuth\u0015Bendix before optimisations. One region (\\\n122\n) of unbounded size,\nindicated  asr122infin  the  picture,  is  responsible  for  most  of  the  space  leak.  Additional  profiling\nreveals that a single program point (the application of an exception constructor to a constant string) is\nresponsible for  all  values  in  that region.\nFIG.  6.Region profile  for Knuth\u0015Bendix after optimizations.\n169REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261362 . By:XX . Date:20:02:97 . Time:10:31 LOP8M. V8.0. Page 01:01\nCodes:  2766 Signs:  2126 . Length:   52 pic 10 pts,   222 mm\nFIG.  7.Comparison  between  stand-alone  programs  created  with  the  ML  Kit  (using  the  HP  PA-\nRISC code generator) and SML of New Jersey, respectively. Here ``orig'' means original program, while\n``impr''  means  improved  for  region  inference.  All  times  are  user  time  in  seconds  on  an  HP  9000  s700,\nmeasured  using  the  unixtimecommand.  Space  is  maximal  resident  memory  in  kilobytes,  measured\nwithtop,  and  includes code  and  runtime  system. All  values  are average  over  three runs.\nKnuth\u0015Bendix in Fig. 7 for an example). This is not very surprising: when the static\nanalysis is able to infer shorter lifetimes, it may well be because the values actually\nneed to be live for a shorter time,  and this is good for garbage collection too. The\nregion  profile  of  the  improved  Knuth\u0015Bendix  completion  is  shown  in  Fig. 6;  see\nFig. 7 for  a comparison with SML of New Jersey,  version 93.\n12.3.  Automatic Program Transformation\nApart from functions that are deliberately written as region endomorphisms,  the\ngeneral rule is that the more regions are separated, the better (since it makes more\naggressive  recycling  of  memory  possible).  The  Kit  performs  optimisations  which\nseparate  regions.  These  include  replacingletx=e\n1\nine\n2\nendbye\n2\n[e\n1\n\u0012x]in\ncases  wheree\n1\nis  a  syntactic  value  and  eitherxoccurs  at  most  once  ine\n2\nor  the\nvalue denoted  bye\n1\nis not larger than some given constant. Another optimisation,\nwhich  is  implemented,  is  specialisation  of  curried  functions,  as  in  thestring2\nexample  above;  however,  the  Kit  does  not  attempt  to  turn  functions  into  region\nendomorphisms  (which  was  the  last  thing  we  did  instring2).  As  a  matter\nof  principle,  the  Kit  avoids  optimisations  which  can  lead  to  increased  memory\nusage.\nAlso useful is the ability of the region inference to suggest where space leaks may\nbe expected. If a function has compound type scheme\n\\\\\u0011:\u0011=\u0011.+\n1\nw\u0014\n=..\n+\n2\nand.contains  an  atomic  effect  of  the  formput(\\),  where\\is  not  amongst  the\nbound region variables\\\u0011, then one quite possibly has a space leak: every call of the\nfunction might put a value into some region which is external to the function. If in\n170\nTOFTE  AND TALPIN\n\nFile: 643J261363 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2876 Signs:  2273 . Length:   52 pic 10 pts,   222 mm\naddition\\does not occur free in+\n2\n, that is all the more reason for concern, for the\nvalue will not even be part of the result of the function. In other words, the function\nhas  a  side-effect  at  the  implementation  level.  This  can  easily  happen  even  when\nthere are no side-effects  in  the source program.\nIn  such  cases,  the  implementation  simply  issues  a short warning.  This turns  out\nto be very  useful in practice.\nAnother  usage  of  the  inferred  information  is  the  ability  to  detect  dead  code.\nConsider the rule forletregion(Rule 27). Ifput(\\)#.andget(\\)\u0012.then what-\never  value  that  was  put  into\\was  never  used.  For  example,  this  can  detect  that\nthe functionsfandgbelow are never used:\nlet\nfun f(x) = x+1\nfun g(x) = f(f(x))\nin\n(fn xO3)(fn( )Og5)\nend\n12.4.  Conclusion\nAs  has  been  shown  with  the  previous  examples,  it  is  not  the  case  that  every\nML  program  automatically  runs  well  on  a  stack  of  regions.  Often,  one  has  to\nprogram  in  a  region-friendly  style,  aided  by  profiling  tools  to  find  space  leaks.\nThus,  programming  with  regions  is  different  from  usual  ML  programming,  where\none  relies  on  a  garbage  collector  for  memory  management.  On  the  other  hand,\nthe   region   discipline   offers   what   we   feel   is   an   attractive   combination   of   the\nconvenience  of  an  expressive  programming  language  and  the  ability  to  reason\nabout  the  time  and  space  performance  of  programs.  The  relationship  between\nthe   abstract   model   of   the   regions   presented   in   this   paper   and   the   concrete\nimplementation  is  close  enough  that  one  can  use  the  abstract  model\u0015\u0015combined\nwith the profiling tools mentioned earlier\u0015\u0015to tune programs, often resulting in very\nspace  efficient  programs  that  are  executed  as  written,  with  no  added  costs  of\nunbounded size.\nAPPENDIX A:  EXAMPLE THREE\u0015ADDRESS  CODE\nThe three-address code which the ML Kit produces on the way to HP PA-RISC\ncode for the example given in Section 1 is shown below. Temporary variables start\nwithV. Fixed registers are used for the stack pointer (SP) and for function call and\nreturn  (stdArg,  stdClos,  stdRes).  In  this  example,  the  compiler  discovers\nthat  all  regions  can  be  represented  on  the  stack;  in  other  cases,letregionand\nendtranslate  into  calls  of  runtime  system  procedures  that  resemble  lightweight\nmallocandfreeoperations.\n171\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261364 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2884 Signs:  1521 . Length:   53 pic 11 pts,   227 mm\nLABEL  1: (VmainV)\n}}}\nAllocRegion(V43); (Vallocate  global  region  rho1V)\n}}}\n(Vbegin  LETREGION  [rho4, rho5]V)\nMove(SP, V46);\n(VV46 :=SP, i.e.  rho4V)\nOffset(SP, 12, SP);\nMove(SP, V47);\n(Vrho5V)\nOffset(SP, 12, SP);\n(Vbegin APP --- non tail callV)\n(Vbegin operatorV)\n(Vbegin LETREGION (rho6 eliminated)V)\n(Vbegin LETV)\n(Vbegin RECORDV)\nMove(V47, V54);\n(Vallocate storage for recordV)\nMove(5, V55);\n(V5 represents 2V)\nStoreIndexL(V55, V54, 1);\n(Vstore component of recordV)\nMove(7, V55);\n(V7 represents 3V)\nStoreIndexL(V55, V54, 2);\n(Vstore component of recordV)\nStoreIndexL(20, V54, 0);\n(VtagV)\nMove(V54, V51);\n(Vsave address of record as resultV)\n(Vend of RECORDV)\n(*LET scope:V)\nMove(V46, V52); (Vallocate  storage  for closure  for FN  yO}}}V)\nStoreIndexL(Lab5, V52, 0);\n(Vstore  code  pointer  in closureV)\nMove(V51, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  x in  closureV)\nFetchVars(V43);\nMove(V43, V53);\nStoreIndexL(V53, V52, 2);\n(Vsave  free  variable  rho1  in closureV)\nMove(V52, V48);\n(Vsave  address  of closure  as resultV)\n(Vend LETV)\n(Vend LETREGION  (rho6  eliminated)V)\n(Vend operator, begin  operandV)\nMove(11, V49);\n(V11 represents 5V)\n(Vend operandV)\nPush(Lab4);\n(Vpush  return  addressV)\nMove(V48, stdClos);\nMove(V49, stdArg);\nFetchIndexL(stdClos, 0, V50); (Vfetch code address from closureV)\nJmp(V50)\nLABEL  4:\n(Vreturn  addressV)\nMove(stdRes, V45);\n(Vend APPV);\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION  rho5V)\nOffset(SP,\nt\n12, SP);\n(Vend LETREGION rho4V)\nHALT\nLABEL  5:\n(Vcode  for  function  FN yO}}}V)\n(Vbegin  RECORDV)\nFetchVars(V43)\nMove(V43, V57);\nAllocMeml(V57, 3, V57);(Vallocate  storage  for  record  at rho1V)\nFetchIndexL(stdClos, 1, V59);(Vaccess variable: xV)\n172TOFTE  AND TALPIN\n..\n\nFile: 643J261365 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2837 Signs:  1972 . Length:   52 pic 10 pts,   222 mm\nFetchIndexL(V59, 1, V58);(Vextract  component  0 from  record.V)\nStoreIndexL(V58, V57, 1);(Vstore  component  of  recordV)\nMove(stdArg, V58);(Vaccess  variable: yV)\nStoreIndexL(V58, V57, 2);(Vstore  component  of  recordV)\nStoreIndexL(20, V57, 0);(VtagV)\nMore(V57, stdRes);(Vsave  address  of record  as resultV)\n(Vend of  RECORDV)\n(Vreturn:V)\nPop(V56);\nJmp(V56)\nAPPENDIX B:  NOMENCLATURE\nThe  index  refers  to  sections  where\ntheconceptsare    introduced.    For\nexample,  the  entry  ``region  name  (r#\nRegName)  2,  Fig. 1,  4.1''  means  that\nthe  notion  of   region  name   is  intro-\nduced in Sections 2 and 4.1, appears in\nFig. 1  and  that  meta-variablerranges\nover   region   names   throughout   the\npaper.\n[  ]  (region arguments)  2,  4\n\\(in type schemes)  3.2,  5.1\n+  (modification  of  finite  maps)  3.1,\n4.1\na(restriction  of finite map) 3.1\n\"\"(restriction of store) 4.1\nAw\u0014\nfin\nB(finite maps) 3.1\n_\nML\n\u001e{\nML\n(see instance)\n*(function abstraction) 3\n:(see type variable)\n:\u0011(sequence of type variables)  5.1\n#(see  claim of consistency)\n1(set of claims) 7\n1\n*\n(maximal fixed  point  ofF)7\n=(see  effect variable)\n=\u0011(sequence of effect variables)  5.1\n=..(see  arrow effect)\n\\(see region variable)\n\\\u0011(sequence of region variables)  5.1\n{(type)  5.1\n_(type scheme)  5.1\n{\nML\n(ML type)  3.2\n_\nML\n(ML type scheme)  3.2\n(x,e,E),(x,e,E,f),(x,e$,VE,R)\nor(\\\n1\n}}}\\\nk\n,x,e,VE,R)(see\nclosure)\nTE\nML\n|&e:{\nML\n(type  rules  for source)\n3.2\nE|&e\u0014v(evaluation of source expres-\nsions)  3.3\ns,VE,R|&e\u0014v,s$   (evaluation   of\ntarget expression)  4.1\nTE|&eOe$:+,.(region    inference\nrules)  5.2\nAddr (see address)\naddress (aor (r,o) # Addr=RegName_\nOff Set) 4.1\nagreement    between   region    environ-\nments 6\narrow effect (=..)  5.1\nat(allocation  directive) 1,  4\nbv  (bound  variables  of  type  scheme)\n5.1\nc(see  integer constant)\nC(domain  for consistency)  7\nC6,  7\nco-induction 7\nclaim of consistency(#)7\nclosure (in dynamic semantics)\nsourcelanguage((x,e,E)or\n(x,e,E,f))  3.3\ntarget   language   ((x,e$,VE,R)or\n(\\\n1\n}}}\\\nk\n,x,e,VE,R))  4.1\nconnecting an effect to a store 6\n173\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261366 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  2819 Signs:  2207 . Length:   52 pic 10 pts,   222 mm\nconsistency 6\nDom (domain  of finite map) 3.1\nE(see environment)\nEffect Figure 3\nEffectVar  (see effect variable)\neffect (.)  5.1\nvariable  (=)  5.1\natomic  (')  5.1\neffect substitution  (S\ne\n) 5.1\nEnv (see environment)\nenvironment   (see   also   type   environ-\nment and region environment)\nin   dynamic   semantics   for   source\n(E# Env=Varw\u0014\nfin\nVal) 3.3\nin dynamic semantics of target (VE#\nTargetEnv=Varw\u0014\nfin\nAddr)  4.1\nequivalence of  type schemes 5.1\nf(see  program variable)\nF(monotonic   operator   on   sets   of\nclaims) 7\nfev  (free  effect variables) 5.1\nfpv (free program  variables) 4.6\nfrv  (free  region variables)  4.6,  5.1\nftv (free type variables) 5.1\nfv(freetype,regionandeffect\nvariables)  5.1\nget(get effect)  5.1\ninstance\nin source language  (_\nML\n\u001e{) 3.2\nin target language  (_\u001e{) 5.1\ninteger constant  (c)3\nletregion1,  4\no(see offset)\nof(projection)  3.1\noffset (o)  4.1\np(see region variable)\nP(powerset  constructor)  7\nplanar domain  of a store  (Pdom) 4.1\npolymorphism\nin types 3.2,  5.1\nin regions 2,  4.3,  5.1,  5.2,  10\nin effects 5.1,  5.2,  10\nvalue restriction  2,  3.2,  5.2\nprogram variable  (xorf)3\nput(put effect)  5.1\nr(see  region name)\nR(see  region environment)\nRegEnv  (see  region environment)\nRegName  (see region name)\nRegion=Off Setw\u0014\nfin\nStoreVal (see also\nregion) 4.1\nregion  (see  also Region) 1,  4.1\nregion allocation 8.4\nregionenvironment(R# RegEnv=\nRegVarw\u0014\nfin\nRegName)  4.1\nregion  function  closure  ((\\\n1\n}}}\\\nk\n,x,\ne,VE,R))  (see closure)\nregion  name  (r# RegName)  2,  Fig. 1,\n4.1\nregion renaming 8.3\nregion substitution  (S\nr\n)  5.1\nregion variable  (\\orp)1,4\nRng  (range  of finite map)  3.1\nSExp (source language) 3\nTE(type environment)  5.1\nTE\nML\n(ML type environment) 3.2\nTExp  (target language)  4\ns(see  store)\ns(a)  4.1\nS(see substitution)\nS\ne\n(see effect substitution)\nS\nr\n(see  region substitution)\nS\nt\n(see  type substitution)\nStore  (see store)\nstore (s# Store=RegNamew\u0014\nfin\nRegion)\n4.1\nStoreVal  (see value,  storable)\nsubstitution  (S) 5.1\nsupport (Supp)  5.1\nsv(see value,  storable)\nTargetEnv  (see environment)\nTargetVal (see value)\nTyVar (see type variable)\ntype ({) 5.1\ntype with place (+# TypeWithPlace =\nType_RegVar)  5.1,  Fig. 3\nTypeWithPlace  (see  type with place)\ntype  environment  (TE# Varw\u0014\nfin\nType\nScheme_RegVar)  5.1\nTypeScheme  Fig. 3\ntype scheme  (_)  5.1\ntype substitution  (S\nt\n)  5.1\n174\nTOFTE  AND TALPIN\n\nFile: 643J261367 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes:  6098 Signs:  3417 . Length:   52 pic 10 pts,   222 mm\ntype variable (:) 3.2,  5.1\ntype with place  (+) 5.1\nVal  (see value)\nvalue\nsource language  (v# Val)  3.3\nstorable  (sv# StoreVal)  4.1\ntarget language (vora# TargetVal=\nAddr) 4.1\nVE(see environment)\ntarget language  (v$)\nx(see program variable)\nyield (Yield)  8.3\nACKNOWLEDGMENTS\nIt  would  have  been  impossible  to  assess  the  practical  use  of  the  region  inference  rules  without  the\nsoftware  developed  by  the  ML  Kit  with  the  Regions  development  team.  Lars  Birkedal wrote  the  com-\npiler  from  region-annotated  lambda-terms  to  C,  together  with  a  runtime  system  in  C.  Martin  Elsman\nand Niels Hallenberg extended this work to HP PA-RISC code generation, including register allocation\nand  instruction  scheduling.  Magnus  Vejlstrup  developed  the  multiplicity  inference  for  inferring  region\nsizes.  Niels  Hallenberg  implemented  the  region  profiler.  Peter  Sestoft  and  Peter  Bertelsen  conducted\nthorough  tests  of  the  system  and  improved  the  storage  mode  analysis.  The  first  author  thanks  Mikkel\nThorup and Bob Paige for generously providing algorithmic expertise,  specifically on graph algorithms;\ntheir input was very  important for the detailed design and implementation of the region inference algo-\nrithms  in  the  Kit.  The  depth-first  search  algorithms  in  Section12.1  were  suggested  by  John  Reynolds.\nFinally,  we  thank  the referees for  many constructive suggestions and comments.\nReceived May 17,  1995; final manuscript  received September  4,  1996\nREFERENCES\n1.  Aiken,  A.,  Fa\u0018 hndrich,  M.,  and  Levein,  R.,  Better  static  memory  management:  Improving  region-\nbased analysis of higher-order languages,in``Proceedings of the ACM SIGPLAN '95 Conference on\nProgramming  Languages  and  Implementation  (PLDI),  La  Jolla,  CA,  June  1995,''  pp. 174\u00151850,\nACM Press.\n2.  Appel,  A.  W. (1992),  ``Compiling with Continuations,'' Cambridge Univ. Press,  Cambridge,  UK.\n3.  Baker,  H.  (1978),  List  processing in  real  time on  a serial computer,Comm.ACM21,  280\u0015294.\n4.  Baker,  H. G., Unify and conquer (garbage collection, updating, aliasing, ...) in functional languages,\nin``Proceedings  of  the  1990  ACM  Conference  on  Lisp  and  Functional  Programming,  June  1990,''\npp. 218\u0015226.\n5.  Birkedal, L., Tofte, M., and Vejlstrup, M. (1996), From region inference to von Neumann machines\nvia  region  representation  inference,in``Proceedings  of  the  23rd  ACM  SIGPLAN\u0015SIGACT  Sym-\nposium on  Principles  of Programming Languages,'' pp. 171\u0015183,  ACM Press.\n6.  Gifford, J. M. L. D. K., Jouvelot, P., and Sheldon, M. (1987), ``Fx-87 Reference Manual,'' Technical\nReport MIT\u0012LCS\u0012TR-407,  MIT Laboratory  for Computer Science.\n7.  Damas,  L., and Milner,  R. (1982), Principal type schemes for functional programs,in``Proceedings,\n9th Annual  ACM  Symposium  on Principles  of Programming Languages,'' pp. 207\u0015212.\n8.  Dijkstra, E. W. (1960), Recursive programming,Numer.Math2, 312\u0015318; also in Rosen, ``Program-\nming Systems  and Languages,'' McGraw\u0015Hill,  1967.\n9.  Elsman,  M.,  and Hallenberg,  N. (1995),  ``An Optimizing Backend for the ML Kit Using a Stack of\nRegions,''  Student  Project  95-7-8,  Department  of  Computer  Science,  University  of  Copenhagen\n(DIKU).\n10.  Georgeff,  M.  (1984),  Transformations  and reduction strategies for  typed lambda expressions,ACM\nTrans.Programming Languages Systems6,  603\u0015631.\n10a.  Hallenberg,  N.,  A  region  profiler  for  a  standard  ML  compiler  based  on  region  inference,  student\nproject 96-5-7, Department of Computer Science, University of Copenhagen (DIKU), June 14, 1996.\n175REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261368 . By:CV . Date:20:03:97 . Time:13:04 LOP8M. V8.0. Page 01:01\nCodes:  6916 Signs:  3068 . Length:   52 pic 10 pts,   222 mm\n11.  Hudak, P., A semantic model of reference counting and its abstraction,in``ACM Symposium on List\nand Functional  Programming,  1986,'' pp. 351\u0015363.\n12.  Jouvelot,  P.,  and  Gifford,  D.,  Algebraic  reconstruction  of  types  and  effects,in``Proceedings  of  the\n18th ACM  Symposium  on Principles  of Programming Languages  (POPL),  1991.''\n13.  Katsuro  Inoue,  H.  S.,  and  Yagi,  H.  (1988),  Analysis  of  functional  programs  to  detect  run-time\ngarbage  cells,ACM Trans.Programming Languages Systems10,  555\u0015578.\n14.  Knuth,   D.  E.  (1972),  ``Fundamental  Algorithms,''  The  art  of  Computer  Programming,   Vol. 1,\nAddison\u0015Wesley,  Reading,  MA.\n15.  Lieberman, H., and Hewitt, C. (1983), A real-time garbage collector based on the lifetimes of objects,\nComm.ACM26,  419\u0015429.\n16.  Lucassen,  J.,  and  Gifford,  D.,  Polymorphic  effect  systems,in``Proceedings  of  the 1988  ACM  Con-\nference on  Principle  of  Programming  Languages,  1988.''\n17.  Lucassen,  J.  M.  (1987),  ``Types  and  Effects,  towards  the  Integration  of  Functional  and  Imperative\nProgramming,''  Ph.D. thesis,  MIT  Laboratory for  Computer Science; MIT\u0012LCS\u0012TR-408.\n18.  Milner,  R.  (1978),  A  theory  of  type  polymorphism  in  programming,J.Comput.System  Sci.17,\n348\u0015375.\n19.  Milner,  R.,  Tofte,  M.,  and  Harper,  R.  (1990),  ``The  Definition  of  StandardML,''  MIT  Press,\nCambridge,  MA.\n20.  Mycroft,  A. (1984),  Polymorphic type schemes and recursive definitions,in``Proceedings, 6th Inter-\nnational  Conference  on  Programming,''  Lecture  Notes  in  Computer  Science,  Vol. 167,  Springer-\nVerlag,  Berlin\u0012New York.\n21.  Naur,  Peter  (Ed.)  (1963),  Revised  report  on  the  algorithmic  language  Algol  60,Comm.ACM1,\n1\u001517.\n21a.  Nielson,  H.  R.,  and  Nielson,  F.,  Higher-order  concurrent  programs  with  finite  communication\ntopology,in``Conference  Record  of  POPL'94:  21 st  ACM  SIGPLAN\u0015SIGACT  Symposium  on\nPrinciples of  Programming Languages,'' pp. 84\u001597,  Assoc. Comput. Mach.  Press,  Jan. 1994.\n22.  Ruggieri,  C.,  and  Murtagh,  T.  P.  (1988),  Lifetime  analysis  of  dynamically  allocated  objects,in\n``Proceedings  of  the  15th  Annual  ACM  Symposium  on  Principles  of  Programming  Languages,''\npp. 285\u0015293.\n23.  Talpin, J.-P. (1993), ``Theoretical and Practical Aspects of Type and Effect Inference,'' Doctoral Dis-\nsertation;  also  available  as Research  Report  EMP\u0012CRI\u0012A-236,  Ecole  des Mines de Paris.\n24.  Talpin,  J.-P.,  and  Jouvelot,  P.  (1992),  Polymorphic  type,  region  and  effect  inference,J.Funct.\nProgramming2.\n25.  Tofte,  M.,  and  Talpin,  J.-P.  (1993),  ``A  Theory  of  Stack  Allocation  in  Polymorphically  Typed\nLanguages,'' Technical Report DIKU-report 93\u001215,  Department of Computer Science, University of\nCopenhagen.\n26.  Tofte, M., and Talpin, J.-P. (1994), Implementing the call-by-value lambda-calculus using a stack of\nregions,in``Proceedings   of   the   21st   ACM   SIGPLAN\u0015SIGACT   Symposium   on   Principles   of\nProgramming Languages,'' pp. 188\u0015201,  ACM Press.\n27.  Turner,  D. N.,  Wadler,  P.,  and Mossin,  C.,  Once upon a type,in``Conference Record of FPCA'95,\nSIGPLAN\u0015SIGARCH\u0015WG2.8  Conference  on  Functional  Programming  Languages  and  Computer\nArchitecture,''  pp. 1\u001511,  Assoc. Comput. Mach.  Press,  June 1995.\n176TOFTE  AND TALPIN",
+    "dataFromCrossref": {
+      "indexed": {
+        "date-parts": [
+          [
+            2024,
+            1,
+            31
+          ]
+        ],
+        "date-time": "2024-01-31T16:34:41Z",
+        "timestamp": 1706718881300
+      },
+      "reference-count": 31,
+      "publisher": "Elsevier BV",
+      "issue": "2",
+      "license": [
+        {
+          "start": {
+            "date-parts": [
+              [
+                1997,
+                2,
+                1
+              ]
+            ],
+            "date-time": "1997-02-01T00:00:00Z",
+            "timestamp": 854755200000
+          },
+          "content-version": "tdm",
+          "delay-in-days": 0,
+          "URL": "https://www.elsevier.com/tdm/userlicense/1.0/"
+        },
+        {
+          "start": {
+            "date-parts": [
+              [
+                2013,
+                7,
+                17
+              ]
+            ],
+            "date-time": "2013-07-17T00:00:00Z",
+            "timestamp": 1374019200000
+          },
+          "content-version": "vor",
+          "delay-in-days": 6010,
+          "URL": "https://www.elsevier.com/open-access/userlicense/1.0/"
+        }
+      ],
+      "content-domain": {
+        "domain": [],
+        "crossmark-restriction": false
+      },
+      "published-print": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "DOI": "10.1006/inco.1996.2613",
+      "type": "journal-article",
+      "created": {
+        "date-parts": [
+          [
+            2002,
+            10,
+            6
+          ]
+        ],
+        "date-time": "2002-10-06T17:10:40Z",
+        "timestamp": 1033924240000
+      },
+      "page": "109-176",
+      "source": "Crossref",
+      "is-referenced-by-count": 384,
+      "title": "Region-Based Memory Management",
+      "prefix": "10.1006",
+      "volume": "132",
+      "author": [
+        {
+          "given": "Mads",
+          "family": "Tofte",
+          "sequence": "first",
+          "affiliation": []
+        },
+        {
+          "given": "Jean-Pierre",
+          "family": "Talpin",
+          "sequence": "additional",
+          "affiliation": []
+        }
+      ],
+      "member": "78",
+      "reference": [
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF1",
+          "doi-asserted-by": "crossref",
+          "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press",
+          "DOI": "10.1145/207110.207137"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF2",
+          "series-title": "Compiling with Continuations",
+          "author": "Appel",
+          "year": "1992"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF3",
+          "doi-asserted-by": "crossref",
+          "first-page": "280",
+          "DOI": "10.1145/359460.359470",
+          "article-title": "List processing in real time on a serial computer",
+          "volume": "21",
+          "author": "Baker",
+          "year": "1978",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF4",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,",
+          "DOI": "10.1145/91556.91652"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF5",
+          "doi-asserted-by": "crossref",
+          "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press",
+          "DOI": "10.1145/237721.237771"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF6",
+          "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF7",
+          "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Principal type schemes for functional programs",
+          "author": "Damas",
+          "year": "1982"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8A",
+          "doi-asserted-by": "crossref",
+          "first-page": "312",
+          "DOI": "10.1007/BF01386232",
+          "article-title": "Recursive programming",
+          "volume": "2",
+          "author": "Dijkstra",
+          "year": "1960",
+          "journal-title": "Numer. Math"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF8B",
+          "series-title": "Programming Systems and Languages",
+          "author": "Rosen",
+          "year": "1967"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF9",
+          "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions",
+          "author": "Elsman",
+          "year": "1995"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10",
+          "doi-asserted-by": "crossref",
+          "first-page": "603",
+          "DOI": "10.1145/1780.1803",
+          "article-title": "Transformations and reduction strategies for typed lambda expressions",
+          "volume": "6",
+          "author": "Georgeff",
+          "year": "1984",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF10A",
+          "series-title": "A region profiler for a standard ML compiler based on region inference",
+          "author": "Hallenberg",
+          "year": "1996"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF11",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986",
+          "DOI": "10.1145/319838.319876"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF12",
+          "doi-asserted-by": "crossref",
+          "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.",
+          "DOI": "10.1145/99583.99623"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF13",
+          "doi-asserted-by": "crossref",
+          "first-page": "555",
+          "DOI": "10.1145/48022.48025",
+          "article-title": "Analysis of functional programs to detect run-time garbage cells",
+          "volume": "10",
+          "author": "Katsuro Inoue",
+          "year": "1988",
+          "journal-title": "ACM Trans. Programming Languages Systems"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF14",
+          "series-title": "Fundamental Algorithms",
+          "volume": "Vol. 1",
+          "author": "Knuth",
+          "year": "1972"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF15",
+          "doi-asserted-by": "crossref",
+          "first-page": "419",
+          "DOI": "10.1145/358141.358147",
+          "article-title": "A real-time garbage collector based on the lifetimes of objects",
+          "volume": "26",
+          "author": "Lieberman",
+          "year": "1983",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF16",
+          "doi-asserted-by": "crossref",
+          "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988",
+          "DOI": "10.1145/73560.73564"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF17",
+          "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF18",
+          "doi-asserted-by": "crossref",
+          "first-page": "348",
+          "DOI": "10.1016/0022-0000(78)90014-4",
+          "article-title": "A theory of type polymorphism in programming",
+          "volume": "17",
+          "author": "Milner",
+          "year": "1978",
+          "journal-title": "J. Comput. System Sci."
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF19",
+          "series-title": "The Definition of Standard ML",
+          "author": "Milner",
+          "year": "1990"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF20",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1007/3-540-12925-1_41",
+          "article-title": "Polymorphic type schemes and recursive definitions",
+          "volume": "Vol. 167",
+          "author": "Mycroft",
+          "year": "1984",
+          "journal-title": "Lecture Notes in Computer Science"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21",
+          "first-page": "1",
+          "article-title": "Revised report on the algorithmic language Algol 60",
+          "volume": "1",
+          "author": "Naur",
+          "year": "1963",
+          "journal-title": "Comm. ACM"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF21A",
+          "doi-asserted-by": "crossref",
+          "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/174675.174538"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF22",
+          "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages",
+          "article-title": "Lifetime analysis of dynamically allocated objects",
+          "author": "Ruggieri",
+          "year": "1988"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23A",
+          "series-title": "Theoretical and Practical Aspects of Type and Effect Inference",
+          "author": "Talpin",
+          "year": "1993"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF23B",
+          "unstructured": "Ecole des Mines de Paris"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF24",
+          "doi-asserted-by": "crossref",
+          "DOI": "10.1017/S0956796800000393",
+          "article-title": "Polymorphic type, region and effect inference",
+          "volume": "2",
+          "author": "Talpin",
+          "year": "1992",
+          "journal-title": "J. Funct. Programming"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF25",
+          "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF26",
+          "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages",
+          "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions",
+          "author": "Tofte",
+          "year": "1994"
+        },
+        {
+          "key": "10.1006/inco.1996.2613_IC962613RF27",
+          "doi-asserted-by": "crossref",
+          "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press",
+          "DOI": "10.1145/224164.224168"
+        }
+      ],
+      "container-title": "Information and Computation",
+      "original-title": [],
+      "language": "en",
+      "link": [
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml",
+          "content-type": "text/xml",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        },
+        {
+          "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain",
+          "content-type": "text/plain",
+          "content-version": "vor",
+          "intended-application": "text-mining"
+        }
+      ],
+      "deposited": {
+        "date-parts": [
+          [
+            2019,
+            12,
+            17
+          ]
+        ],
+        "date-time": "2019-12-17T03:20:37Z",
+        "timestamp": 1576552837000
+      },
+      "score": 1,
+      "resource": {
+        "primary": {
+          "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139"
+        }
+      },
+      "subtitle": [],
+      "short-title": [],
+      "issued": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      },
+      "references-count": 31,
+      "journal-issue": {
+        "issue": "2",
+        "published-print": {
+          "date-parts": [
+            [
+              1997,
+              2
+            ]
+          ]
+        }
+      },
+      "alternative-id": [
+        "S0890540196926139"
+      ],
+      "URL": "http://dx.doi.org/10.1006/inco.1996.2613",
+      "relation": {},
+      "ISSN": [
+        "0890-5401"
+      ],
+      "subject": [
+        "Computational Theory and Mathematics",
+        "Computer Science Applications",
+        "Information Systems",
+        "Theoretical Computer Science"
+      ],
+      "container-title-short": "Information and Computation",
+      "published": {
+        "date-parts": [
+          [
+            1997,
+            2
+          ]
+        ]
+      }
+    }
+  },
+  "arxiv_1512.03385": {
+    "path": [
+      "resnet.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nDeep Residual Learning for Image Recognition\nKaiming HeXiangyu ZhangShaoqing RenJian Sun\nMicrosoft Research\n{kahe, v-xiangz, v-shren, jiansun}@microsoft.com\nAbstract\nDeeper neural networks are more difficult to train.  We\npresent a residual learning framework to ease the training\nof networks that are substantially deeper than those used\npreviously.   We explicitly reformulate the layers as learn-\ning residual functions with reference to the layer inputs, in-\nstead of learning unreferenced functions.  We provide com-\nprehensive empirical evidence showing that these residual\nnetworks are easier to optimize, and can gain accuracy from\nconsiderably increased depth. On the ImageNet dataset we\nevaluate residual nets with a depth of up to 152 layers—8×\ndeeper than VGG nets [41] but still having lower complex-\nity. An ensemble of these residual nets achieves 3.57% error\non the ImageNettestset. This result won the 1st place on the\nILSVRC 2015 classification task.  We also present analysis\non CIFAR-10 with 100 and 1000 layers.\nThe  depth  of  representations  is  of  central  importance\nfor  many  visual  recognition  tasks.   Solely  due  to  our  ex-\ntremely deep representations, we obtain a 28% relative im-\nprovement  on  the  COCO  object  detection  dataset.   Deep\nresidual nets are foundations of our submissions to ILSVRC\n& COCO 2015 competitions\n1\n, where we also won the 1st\nplaces on the tasks of ImageNet detection, ImageNet local-\nization, COCO detection, and COCO segmentation.\n1. Introduction\nDeep  convolutional  neural  networks  [22,  21]  have  led\nto  a  series  of  breakthroughs  for  image  classification  [21,\n50, 40].  Deep networks naturally integrate low/mid/high-\nlevel  features  [50]  and  classifiers  in  an  end-to-end  multi-\nlayer fashion, and the “levels” of features can be enriched\nby the number of stacked layers (depth).  Recent evidence\n[41, 44] reveals that network depth is of crucial importance,\nand the leading results [41, 44, 13, 16] on the challenging\nImageNet dataset [36] all exploit “very deep” [41] models,\nwith a depth of sixteen [41] to thirty [16]. Many other non-\ntrivial visual recognition tasks [8, 12, 7, 32, 27] have also\n1\nhttp://image-net.org/challenges/LSVRC/2015/and\nhttp://mscoco.org/dataset/#detections-challenge2015.\n0123456\n0 \n10\n20\niter. (1e4)\ntraining error (%)\n \n \n0123456\n0\n10\n20\niter. (1e4)\ntest error (%)\n \n \n56-layer\n20-layer\n56-layer\n20-layer\nFigure 1. Training error (left) and test error (right) on CIFAR-10\nwith 20-layer and 56-layer “plain” networks. The deeper network\nhas higher training error, and thus test error.  Similar phenomena\non ImageNet is presented in Fig. 4.\ngreatly benefited from very deep models.\nDriven by the significance of depth, a question arises:Is\nlearning better networks as easy as stacking more layers?\nAn obstacle to answering this question was the notorious\nproblem  of  vanishing/exploding  gradients  [1,  9],  which\nhamper  convergence  from  the  beginning.   This  problem,\nhowever, has been largely addressed by normalized initial-\nization [23, 9, 37, 13] and intermediate normalization layers\n[16], which enable networks with tens of layers to start con-\nverging  for  stochastic  gradient  descent  (SGD)  with  back-\npropagation [22].\nWhen  deeper  networks  are  able  to  start  converging,  a\ndegradationproblem has been exposed:  with the network\ndepth increasing, accuracy gets saturated (which might be\nunsurprising)  and  then  degrades  rapidly.Unexpectedly,\nsuch degradation isnot caused by overfitting,  and adding\nmore layers to a suitably deep model leads tohigher train-\ning error, as reported in [11, 42] and thoroughly verified by\nour experiments. Fig. 1 shows a typical example.\nThe degradation (of training accuracy) indicates that not\nall systems are similarly easy to optimize. Let us consider a\nshallower architecture and its deeper counterpart that adds\nmore layers onto it. There exists a solutionby construction\nto the deeper model: the added layers areidentitymapping,\nand the other layers are copied from the learned shallower\nmodel.  The existence of this constructed solution indicates\nthat a deeper model should produce no higher training error\nthan its shallower counterpart.  But experiments show that\nour current solvers on hand are unable to find solutions that\n1\narXiv:1512.03385v1  [cs.CV]  10 Dec 2015\n\nidentity\nweight layer\nweight layer\nrelu\nrelu\nF(x)\u0001+\u0001x\nx\nF(x)\nx\nFigure 2. Residual learning: a building block.\nare comparably good or better than the constructed solution\n(or unable to do so in feasible time).\nIn  this  paper,  we  address  the  degradation  problem  by\nintroducing  adeep  residual  learningframework.In-\nstead  of  hoping  each  few  stacked  layers  directly  fit  a\ndesired  underlying  mapping,  we  explicitly  let  these  lay-\ners fit a residual mapping.  Formally, denoting the desired\nunderlying mapping asH(x), we let the stacked nonlinear\nlayers fit another mapping ofF(x) :=H(x)−x. The orig-\ninal mapping is recast intoF(x)+x. We hypothesize that it\nis easier to optimize the residual mapping than to optimize\nthe original, unreferenced mapping.  To the extreme, if an\nidentity mapping were optimal, it would be easier to push\nthe residual to zero than to fit an identity mapping by a stack\nof nonlinear layers.\nThe formulation ofF(x)+xcan be realized by feedfor-\nward neural networks with “shortcut connections” (Fig. 2).\nShortcut connections [2, 34, 49] are those skipping one or\nmore layers.  In our case, the shortcut connections simply\nperformidentitymapping,  and  their  outputs  are  added  to\nthe outputs of the stacked layers (Fig. 2).   Identity short-\ncut connections add neither extra parameter nor computa-\ntional complexity.  The entire network can still be trained\nend-to-end by SGD with backpropagation, and can be eas-\nily implemented using common libraries (e.g., Caffe [19])\nwithout modifying the solvers.\nWe  present  comprehensive  experiments  on  ImageNet\n[36]  to  show  the  degradation  problem  and  evaluate  our\nmethod. We show that: 1) Our extremely deep residual nets\nare easy to optimize, but the counterpart “plain” nets (that\nsimply stack layers) exhibit higher training error when the\ndepth increases; 2) Our deep residual nets can easily enjoy\naccuracy gains from greatly increased depth, producing re-\nsults substantially better than previous networks.\nSimilar phenomena are also shown on the CIFAR-10 set\n[20],  suggesting  that  the  optimization  difficulties  and  the\neffects of our method are not just akin to a particular dataset.\nWe present successfully trained models on this dataset with\nover 100 layers, and explore models with over 1000 layers.\nOn the ImageNet classification dataset [36],  we obtain\nexcellent results by extremely deep residual nets. Our 152-\nlayer residual net is the deepest network ever presented on\nImageNet,  while still having lower complexity than VGG\nnets  [41].    Our  ensemble  has3.57%top-5  error  on  the\nImageNettestset,  andwon  the  1st  place  in  the  ILSVRC\n2015 classification competition.   The extremely deep rep-\nresentations also have excellent generalization performance\non other recognition tasks,  and lead us to furtherwin the\n1st places on:  ImageNet detection, ImageNet localization,\nCOCO detection, and COCO segmentationin ILSVRC &\nCOCO 2015 competitions. This strong evidence shows that\nthe residual learning principle is generic, and we expect that\nit is applicable in other vision and non-vision problems.\n2. Related Work\nResidual Representations.In image recognition, VLAD\n[18] is a representation that encodes by the residual vectors\nwith respect to a dictionary, and Fisher Vector [30] can be\nformulated as a probabilistic version [18] of VLAD. Both\nof them are powerful shallow representations for image re-\ntrieval and classification [4, 48].   For vector quantization,\nencoding residual vectors [17] is shown to be more effec-\ntive than encoding original vectors.\nIn  low-level  vision  and  computer  graphics,  for  solv-\ning Partial Differential Equations (PDEs), the widely used\nMultigrid method [3] reformulates the system as subprob-\nlems at multiple scales, where each subproblem is respon-\nsible for the residual solution between a coarser and a finer\nscale.  An alternative to Multigrid is hierarchical basis pre-\nconditioning [45, 46], which relies on variables that repre-\nsent residual vectors between two scales. It has been shown\n[3, 45, 46] that these solvers converge much faster than stan-\ndard solvers that are unaware of the residual nature of the\nsolutions. These methods suggest that a good reformulation\nor preconditioning can simplify the optimization.\nShortcut Connections.Practices and theories that lead to\nshortcut connections [2, 34, 49] have been studied for a long\ntime.  An early practice of training multi-layer perceptrons\n(MLPs) is to add a linear layer connected from the network\ninput  to  the  output  [34,  49].   In  [44,  24],  a  few  interme-\ndiate  layers  are  directly  connected  to  auxiliary  classifiers\nfor addressing vanishing/exploding gradients.   The papers\nof [39, 38, 31, 47] propose methods for centering layer re-\nsponses, gradients, and propagated errors, implemented by\nshortcut connections.  In [44], an “inception” layer is com-\nposed of a shortcut branch and a few deeper branches.\nConcurrent with our work, “highway networks” [42, 43]\npresent  shortcut  connections  with  gating  functions  [15].\nThese  gates  are  data-dependent  and  have  parameters,  in\ncontrast  to  our  identity  shortcuts  that  are  parameter-free.\nWhen a gated shortcut is “closed” (approaching zero), the\nlayers  in  highway  networks  representnon-residualfunc-\ntions.On  the  contrary,  our  formulation  always  learns\nresidual functions; our identity shortcuts are never closed,\nand  all  information  is  always  passed  through,  with  addi-\ntional residual functions to be learned.   In addition,  high-\n2\n\nway networks have not demonstrated accuracy gains with\nextremely increased depth (e.g., over 100 layers).\n3. Deep Residual Learning\n3.1. Residual Learning\nLet us considerH(x)as an underlying mapping to be\nfit by a few stacked layers (not necessarily the entire net),\nwithxdenoting the inputs to the first of these layers. If one\nhypothesizes that multiple nonlinear layers can asymptoti-\ncally approximate complicated functions\n2\n, then it is equiv-\nalent to hypothesize that they can asymptotically approxi-\nmate the residual functions,i.e.,H(x)−x(assuming that\nthe  input  and  output  are  of  the  same  dimensions).So\nrather than expect stacked layers to approximateH(x), we\nexplicitly let these layers approximate a residual function\nF(x) :=H(x)−x.  The original function thus becomes\nF(x)+x. Although both forms should be able to asymptot-\nically approximate the desired functions (as hypothesized),\nthe ease of learning might be different.\nThis reformulation is motivated by the counterintuitive\nphenomena about the degradation problem (Fig. 1, left). As\nwe  discussed  in  the  introduction,  if  the  added  layers  can\nbe constructed as identity mappings, a deeper model should\nhave  training  error  no  greater  than  its  shallower  counter-\npart.   The  degradation  problem  suggests  that  the  solvers\nmight have difficulties in approximating identity mappings\nby multiple nonlinear layers. With the residual learning re-\nformulation,  if identity mappings are optimal,  the solvers\nmay simply drive the weights of the multiple nonlinear lay-\ners toward zero to approach identity mappings.\nIn real cases, it is unlikely that identity mappings are op-\ntimal, but our reformulation may help to precondition the\nproblem.   If  the  optimal  function  is  closer  to  an  identity\nmapping than to a zero mapping, it should be easier for the\nsolver to find the perturbations with reference to an identity\nmapping, than to learn the function as a new one. We show\nby experiments (Fig. 7) that the learned residual functions in\ngeneral have small responses, suggesting that identity map-\npings provide reasonable preconditioning.\n3.2. Identity Mapping by Shortcuts\nWe adopt residual learning to every few stacked layers.\nA building block is shown in Fig. 2. Formally, in this paper\nwe consider a building block defined as:\ny=F(x,{W\ni\n}) +x.(1)\nHerexandyare the input and output vectors of the lay-\ners  considered.   The  functionF(x,{W\ni\n})represents  the\nresidual mapping to be learned.  For the example in Fig. 2\nthat has two layers,F=W\n2\nσ(W\n1\nx)in whichσdenotes\n2\nThis hypothesis, however, is still an open question. See [28].\nReLU [29] and the biases are omitted for simplifying no-\ntations.   The operationF+xis performed by a shortcut\nconnection and element-wise addition.  We adopt the sec-\nond nonlinearity after the addition (i.e.,σ(y), see Fig. 2).\nThe shortcut connections in Eqn.(1) introduce neither ex-\ntra parameter nor computation complexity. This is not only\nattractive in practice but also important in our comparisons\nbetween plain and residual networks.  We can fairly com-\npare  plain/residual  networks  that  simultaneously  have  the\nsame  number  of  parameters,  depth,  width,  and  computa-\ntional cost (except for the negligible element-wise addition).\nThe dimensions ofxandFmust be equal in Eqn.(1).\nIf this is not the case (e.g., when changing the input/output\nchannels),  we  can  perform  a  linear  projectionW\ns\nby  the\nshortcut connections to match the dimensions:\ny=F(x,{W\ni\n}) +W\ns\nx.(2)\nWe can also use a square matrixW\ns\nin Eqn.(1). But we will\nshow by experiments that the identity mapping is sufficient\nfor addressing the degradation problem and is economical,\nand thusW\ns\nis only used when matching dimensions.\nThe form of the residual functionFis flexible.  Exper-\niments in this paper involve a functionFthat has two or\nthree layers (Fig. 5), while more layers are possible.  But if\nFhas only a single layer, Eqn.(1) is similar to a linear layer:\ny=W\n1\nx+x, for which we have not observed advantages.\nWe also note that although the above notations are about\nfully-connected layers for simplicity, they are applicable to\nconvolutional layers.  The functionF(x,{W\ni\n})can repre-\nsent multiple convolutional layers.  The element-wise addi-\ntion is performed on two feature maps, channel by channel.\n3.3. Network Architectures\nWe have tested various plain/residual nets, and have ob-\nserved consistent phenomena. To provide instances for dis-\ncussion, we describe two models for ImageNet as follows.\nPlain Network.Our plain baselines (Fig. 3,  middle) are\nmainly inspired by the philosophy of VGG nets [41] (Fig. 3,\nleft).  The convolutional layers mostly have 3×3 filters and\nfollow  two  simple  design  rules:   (i)  for  the  same  output\nfeature map size,  the layers have the same number of fil-\nters;  and  (ii)  if  the  feature  map  size  is  halved,  the  num-\nber  of  filters  is  doubled  so  as  to  preserve  the  time  com-\nplexity  per  layer.   We  perform  downsampling  directly  by\nconvolutional layers that have a stride of 2.  The network\nends with a global average pooling layer and a 1000-way\nfully-connected  layer  with  softmax.   The  total  number  of\nweighted layers is 34 in Fig. 3 (middle).\nIt is worth noticing that our model hasfewerfilters and\nlowercomplexity than VGG nets [41] (Fig. 3, left). Our 34-\nlayer baseline has 3.6 billion FLOPs (multiply-adds), which\nis only 18% of VGG-19 (19.6 billion FLOPs).\n3\n\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n3x3 conv, 512\n3x3 conv, 64\n3x3 conv, 64\npool, /2\n3x3 conv, 128\n3x3 conv, 128\npool, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\npool, /2\nfc 4096\nfc 4096\nfc 1000\nimage\noutput \nsize: 112\noutput \nsize: 224\noutput \nsize: 56\noutput \nsize: 28\noutput \nsize: 14\noutput \nsize: 7\noutput \nsize: 1\nVGG-1934-layer plain\n7x7 conv, 64, /2\npool, /2\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 64\n3x3 conv, 128, /2\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 128\n3x3 conv, 256, /2\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 256\n3x3 conv, 512, /2\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\n3x3 conv, 512\navg pool\nfc 1000\nimage\n34-layer residual\nFigure 3. Example network architectures for ImageNet.Left: the\nVGG-19 model [41] (19.6 billion FLOPs) as a reference.Mid-\ndle: a plain network with 34 parameter layers (3.6 billion FLOPs).\nRight:  a residual network with 34 parameter layers (3.6 billion\nFLOPs). The dotted shortcuts increase dimensions.Table 1shows\nmore details and other variants.\nResidual Network.Based on the above plain network, we\ninsert  shortcut  connections  (Fig.  3,  right)  which  turn  the\nnetwork into its counterpart residual version.  The identity\nshortcuts (Eqn.(1)) can be directly used when the input and\noutput are of the same dimensions (solid line shortcuts in\nFig. 3). When the dimensions increase (dotted line shortcuts\nin Fig. 3), we consider two options:  (A) The shortcut still\nperforms identity mapping, with extra zero entries padded\nfor increasing dimensions.  This option introduces no extra\nparameter; (B) The projection shortcut in Eqn.(2) is used to\nmatch dimensions (done by 1×1 convolutions).   For both\noptions, when the shortcuts go across feature maps of two\nsizes, they are performed with a stride of 2.\n3.4. Implementation\nOur implementation for ImageNet follows the practice\nin [21, 41].  The image is resized with its shorter side ran-\ndomly sampled in[256,480]for scale augmentation [41].\nA 224×224 crop is randomly sampled from an image or its\nhorizontal flip, with the per-pixel mean subtracted [21]. The\nstandard color augmentation in [21] is used. We adopt batch\nnormalization  (BN)  [16]  right  after  each  convolution  and\nbefore activation, following [16].  We initialize the weights\nas in [13] and train all plain/residual nets from scratch. We\nuse SGD with a mini-batch size of 256.  The learning rate\nstarts from 0.1 and is divided by 10 when the error plateaus,\nand the models are trained for up to60×10\n4\niterations. We\nuse a weight decay of 0.0001 and a momentum of 0.9.  We\ndo not use dropout [14], following the practice in [16].\nIn testing, for comparison studies we adopt the standard\n10-crop testing [21].  For best results, we adopt the fully-\nconvolutional form as in [41, 13],  and average the scores\nat multiple scales (images are resized such that the shorter\nside is in{224,256,384,480,640}).\n4. Experiments\n4.1. ImageNet Classification\nWe evaluate our method on the ImageNet 2012 classifi-\ncation dataset [36] that consists of 1000 classes. The models\nare trained on the 1.28 million training images, and evalu-\nated on the 50k validation images.  We also obtain a final\nresult on the 100k test images, reported by the test server.\nWe evaluate both top-1 and top-5 error rates.\nPlain Networks.We first evaluate 18-layer and 34-layer\nplain nets. The 34-layer plain net is in Fig. 3 (middle). The\n18-layer plain net is of a similar form.  See Table 1 for de-\ntailed architectures.\nThe results in Table 2 show that the deeper 34-layer plain\nnet has higher validation error than the shallower 18-layer\nplain net.   To reveal the reasons,  in Fig. 4 (left) we com-\npare their training/validation errors during the training pro-\ncedure.   We have observed the degradation problem - the\n4\n\nlayer nameoutput size18-layer34-layer50-layer101-layer152-layer\nconv1112×1127×7, 64, stride 2\nconv2x56×56\n3×3 max pool, stride 2\n[\n3×3, 64\n3×3, 64\n]\n×2\n[\n3×3, 64\n3×3, 64\n]\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\n\n\n1×1, 64\n3×3, 64\n1×1, 256\n\n\n×3\nconv3x28×28\n[\n3×3, 128\n3×3, 128\n]\n×2\n[\n3×3, 128\n3×3, 128\n]\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×4\n\n\n1×1, 128\n3×3, 128\n1×1, 512\n\n\n×8\nconv4x14×14\n[\n3×3, 256\n3×3, 256\n]\n×2\n[\n3×3, 256\n3×3, 256\n]\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×6\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×23\n\n\n1×1, 256\n3×3, 256\n1×1, 1024\n\n\n×36\nconv5x7×7\n[\n3×3, 512\n3×3, 512\n]\n×2\n[\n3×3, 512\n3×3, 512\n]\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n\n\n1×1, 512\n3×3, 512\n1×1, 2048\n\n\n×3\n1×1average pool, 1000-d fc, softmax\nFLOPs1.8×10\n9\n3.6×10\n9\n3.8×10\n9\n7.6×10\n9\n11.3×10\n9\nTable 1. Architectures for ImageNet. Building blocks are shown in brackets (see also Fig. 5), with the numbers of blocks stacked. Down-\nsampling is performed by conv31, conv41, and conv51 with a stride of 2.\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nplain-18\nplain-34\n01020304050\n20\n30\n40\n50\n60\niter. (1e4)\nerror (%)\n \n \nResNet-18\nResNet-34\n18-layer\n34-layer\n18-layer\n34-layer\nFigure 4. Training onImageNet. Thin curves denote training error, and bold curves denote validation error of the center crops. Left: plain\nnetworks of 18 and 34 layers. Right: ResNets of 18 and 34 layers. In this plot, the residual networks have no extra parameter compared to\ntheir plain counterparts.\nplainResNet\n18 layers27.9427.88\n34 layers\n28.5425.03\nTable 2. Top-1 error (%, 10-crop testing) on ImageNet validation.\nHere the ResNets have no extra parameter compared to their plain\ncounterparts. Fig. 4 shows the training procedures.\n34-layer plain net has highertrainingerror throughout the\nwhole training procedure,  even though the solution space\nof the 18-layer plain network is a subspace of that of the\n34-layer one.\nWe argue that this optimization difficulty isunlikelyto\nbe caused by vanishing gradients. These plain networks are\ntrained  with  BN  [16],  which  ensures  forward  propagated\nsignals to have non-zero variances.  We also verify that the\nbackward propagated gradients exhibit healthy norms with\nBN.  So  neither  forward  nor  backward  signals  vanish.   In\nfact, the 34-layer plain net is still able to achieve compet-\nitive accuracy (Table 3),  suggesting that the solver works\nto some extent. We conjecture that the deep plain nets may\nhave exponentially low convergence rates, which impact the\nreducing of the training error\n3\n.  The reason for such opti-\nmization difficulties will be studied in the future.\nResidual  Networks.Next  we  evaluate  18-layer  and  34-\nlayer  residual  nets  (ResNets).   The  baseline  architectures\nare the same as the above plain nets, expect that a shortcut\nconnection is added to each pair of 3×3 filters as in Fig. 3\n(right).  In the first comparison (Table 2 and Fig. 4 right),\nwe use identity mapping for all shortcuts and zero-padding\nfor increasing dimensions (option A). So they haveno extra\nparametercompared to the plain counterparts.\nWe  have  three  major  observations  from  Table  2  and\nFig. 4.  First, the situation is reversed with residual learn-\ning – the 34-layer ResNet is better than the 18-layer ResNet\n(by 2.8%).  More importantly, the 34-layer ResNet exhibits\nconsiderably lower training error and is generalizable to the\nvalidation data. This indicates that the degradation problem\nis well addressed in this setting and we manage to obtain\naccuracy gains from increased depth.\nSecond, compared to its plain counterpart, the 34-layer\n3\nWe have experimented with more training iterations (3×) and still ob-\nserved the degradation problem,  suggesting that this problem cannot be\nfeasibly addressed by simply using more iterations.\n5\n\nmodeltop-1 err.top-5 err.\nVGG-16 [41]28.079.33\nGoogLeNet [44]\n-9.15\nPReLU-net [13]24.277.38\nplain-3428.5410.02\nResNet-34 A25.037.76\nResNet-34 B\n24.527.46\nResNet-34 C24.197.40\nResNet-5022.856.71\nResNet-101\n21.756.05\nResNet-15221.435.71\nTable 3. Error rates (%,10-croptesting) on ImageNet validation.\nVGG-16 is based on our test.  ResNet-50/101/152 are of option B\nthat only uses projections for increasing dimensions.\nmethodtop-1 err.top-5 err.\nVGG [41] (ILSVRC’14)-8.43\n†\nGoogLeNet [44] (ILSVRC’14)\n-7.89\nVGG [41](v5)24.47.1\nPReLU-net [13]21.595.71\nBN-inception [16]\n21.995.81\nResNet-34 B21.845.71\nResNet-34 C21.535.60\nResNet-5020.745.25\nResNet-10119.874.60\nResNet-15219.384.49\nTable 4. Error rates (%) ofsingle-modelresults on the ImageNet\nvalidation set (except\n†\nreported on the test set).\nmethodtop-5 err. (test)\nVGG [41] (ILSVRC’14)7.32\nGoogLeNet [44] (ILSVRC’14)6.66\nVGG [41](v5)6.8\nPReLU-net [13]4.94\nBN-inception [16]4.82\nResNet (ILSVRC’15)3.57\nTable 5. Error rates (%) ofensembles.  The top-5 error is on the\ntest set of ImageNet and reported by the test server.\nResNet reduces the top-1 error by 3.5% (Table 2), resulting\nfrom the successfully reduced training error (Fig. 4 rightvs.\nleft).  This comparison verifies the effectiveness of residual\nlearning on extremely deep systems.\nLast,  we also note that the 18-layer plain/residual nets\nare comparably accurate (Table 2), but the 18-layer ResNet\nconverges faster (Fig. 4 rightvs. left). When the net is “not\noverly deep” (18 layers here), the current SGD solver is still\nable to find good solutions to the plain net. In this case, the\nResNet eases the optimization by providing faster conver-\ngence at the early stage.\nIdentityvs.  Projection  Shortcuts.We  have  shown  that\n3x3, 64\n1x1, 64\nrelu\n1x1, 256\nrelu\nrelu\n3x3, 64\n3x3, 64\nrelu\nrelu\n64-d256-d\nFigure  5.  A  deeper  residual  functionFfor  ImageNet.   Left:  a\nbuilding block (on 56×56 feature maps) as in Fig. 3 for ResNet-\n34. Right: a “bottleneck” building block for ResNet-50/101/152.\nparameter-free, identity shortcuts help with training.  Next\nwe investigate projection shortcuts (Eqn.(2)). In Table 3 we\ncompare three options: (A) zero-padding shortcuts are used\nfor increasing dimensions, and all shortcuts are parameter-\nfree  (the  same  as  Table  2  and  Fig.  4  right);  (B)  projec-\ntion shortcuts are used for increasing dimensions, and other\nshortcuts are identity; and (C) all shortcuts are projections.\nTable 3 shows that all three options are considerably bet-\nter than the plain counterpart. B is slightly better than A. We\nargue that this is because the zero-padded dimensions in A\nindeed have no residual learning. C is marginally better than\nB, and we attribute this to the extra parameters introduced\nby many (thirteen) projection shortcuts.  But the small dif-\nferences among A/B/C indicate that projection shortcuts are\nnot essential for addressing the degradation problem. So we\ndo not use option C in the rest of this paper, to reduce mem-\nory/time complexity and model sizes. Identity shortcuts are\nparticularly important for not increasing the complexity of\nthe bottleneck architectures that are introduced below.\nDeeper Bottleneck Architectures.Next we describe our\ndeeper nets for ImageNet. Because of concerns on the train-\ning time that we can afford, we modify the building block\nas abottleneckdesign\n4\n.  For each residual functionF, we\nuse a stack of 3 layers instead of 2 (Fig. 5). The three layers\nare 1×1, 3×3, and 1×1 convolutions, where the 1×1 layers\nare responsible for reducing and then increasing (restoring)\ndimensions, leaving the 3×3 layer a bottleneck with smaller\ninput/output dimensions.  Fig. 5 shows an example, where\nboth designs have similar time complexity.\nThe parameter-free identity shortcuts are particularly im-\nportant for the bottleneck architectures. If the identity short-\ncut  in  Fig.  5  (right)  is  replaced  with  projection,  one  can\nshow that the time complexity and model size are doubled,\nas  the  shortcut  is  connected  to  the  two  high-dimensional\nends.   So identity shortcuts lead to more efficient models\nfor the bottleneck designs.\n50-layer ResNet:We replace each 2-layer block in the\n4\nDeepernon-bottleneck ResNets (e.g., Fig. 5 left) also gain accuracy\nfrom increased depth (as shown on CIFAR-10), but are not as economical\nas the bottleneck ResNets. So the usage of bottleneck designs is mainly due\nto practical considerations.  We further note that the degradation problem\nof plain nets is also witnessed for the bottleneck designs.\n6\n\n34-layer net with this 3-layer bottleneck block, resulting in\na 50-layer ResNet (Table 1). We use option B for increasing\ndimensions. This model has 3.8 billion FLOPs.\n101-layer and 152-layer ResNets:We construct 101-\nlayer and 152-layer ResNets by using more 3-layer blocks\n(Table 1).  Remarkably, although the depth is significantly\nincreased, the 152-layer ResNet (11.3 billion FLOPs) still\nhaslower complexitythan VGG-16/19 nets (15.3/19.6 bil-\nlion FLOPs).\nThe  50/101/152-layer  ResNets  are  more  accurate  than\nthe 34-layer ones by considerable margins (Table 3 and 4).\nWe  do  not  observe  the  degradation  problem  and  thus  en-\njoy significant accuracy gains from considerably increased\ndepth. The benefits of depth are witnessed for all evaluation\nmetrics (Table 3 and 4).\nComparisons with State-of-the-art Methods.In Table 4\nwe  compare  with  the  previous  best  single-model  results.\nOur baseline 34-layer ResNets have achieved very compet-\nitive accuracy.   Our 152-layer ResNet has a single-model\ntop-5 validation error of 4.49%.   This single-model result\noutperforms  all  previous  ensemble  results  (Table  5).   We\ncombine six models of different depth to form an ensemble\n(only with two 152-layer ones at the time of submitting).\nThis leads to3.57%top-5 error on the test set (Table 5).\nThis entry won the 1st place in ILSVRC 2015.\n4.2. CIFAR-10 and Analysis\nWe  conducted  more  studies  on  the  CIFAR-10  dataset\n[20],  which consists of 50k training images and 10k test-\ning images in 10 classes.  We present experiments trained\non the training set and evaluated on the test set.  Our focus\nis on the behaviors of extremely deep networks, but not on\npushing the state-of-the-art results, so we intentionally use\nsimple architectures as follows.\nThe plain/residual architectures follow the form in Fig. 3\n(middle/right). The network inputs are 32×32 images, with\nthe per-pixel mean subtracted. The first layer is 3×3 convo-\nlutions.  Then we use a stack of6nlayers with 3×3 convo-\nlutions on the feature maps of sizes{32,16,8}respectively,\nwith 2nlayers for each feature map size.  The numbers of\nfilters are{16,32,64}respectively. The subsampling is per-\nformed by convolutions with a stride of 2. The network ends\nwith  a  global  average  pooling,  a  10-way  fully-connected\nlayer, and softmax. There are totally 6n+2 stacked weighted\nlayers. The following table summarizes the architecture:\noutput map size32×3216×168×8\n# layers1+2n2n2n\n# filters\n163264\nWhen  shortcut  connections  are  used,  they  are  connected\nto the pairs of 3×3 layers (totally3nshortcuts).   On this\ndataset we use identity shortcuts in all cases (i.e., option A),\nmethoderror (%)\nMaxout [10]9.38\nNIN [25]8.81\nDSN [24]8.22\n# layers# params\nFitNet [35]192.5M8.39\nHighway [42, 43]192.3M7.54(7.72±0.16)\nHighway [42, 43]\n321.25M8.80\nResNet200.27M8.75\nResNet\n320.46M7.51\nResNet440.66M7.17\nResNet560.85M6.97\nResNet\n1101.7M6.43(6.61±0.16)\nResNet120219.4M7.93\nTable 6. Classification error on theCIFAR-10test set.  All meth-\nods are with data augmentation. For ResNet-110, we run it 5 times\nand show “best (mean±std)” as in [43].\nso our residual models have exactly the same depth, width,\nand number of parameters as the plain counterparts.\nWe use a weight decay of 0.0001 and momentum of 0.9,\nand adopt the weight initialization in [13] and BN [16] but\nwith  no  dropout.   These  models  are  trained  with  a  mini-\nbatch size of 128 on two GPUs.  We start with a learning\nrate of 0.1,  divide it by 10 at 32k and 48k iterations,  and\nterminate training at 64k iterations, which is determined on\na 45k/5k train/val split. We follow the simple data augmen-\ntation in [24] for training: 4 pixels are padded on each side,\nand  a  32×32  crop  is  randomly  sampled  from  the  padded\nimage or its horizontal flip.  For testing, we only evaluate\nthe single view of the original 32×32 image.\nWe comparen={3,5,7,9}, leading to 20, 32, 44, and\n56-layer networks.  Fig. 6 (left) shows the behaviors of the\nplain nets. The deep plain nets suffer from increased depth,\nand exhibit higher training error when going deeper.  This\nphenomenon is similar to that on ImageNet (Fig. 4, left) and\non MNIST (see [42]), suggesting that such an optimization\ndifficulty is a fundamental problem.\nFig. 6 (middle) shows the behaviors of ResNets.   Also\nsimilar to the ImageNet cases (Fig. 4, right), our ResNets\nmanage to overcome the optimization difficulty and demon-\nstrate accuracy gains when the depth increases.\nWe  further  exploren= 18that  leads  to  a  110-layer\nResNet.  In this case, we find that the initial learning rate\nof 0.1 is slightly too large to start converging\n5\n.  So we use\n0.01 to warm up the training until the training error is below\n80% (about 400 iterations), and then go back to 0.1 and con-\ntinue training.  The rest of the learning schedule is as done\npreviously.  This 110-layer network converges well (Fig. 6,\nmiddle).  It hasfewerparameters than other deep and thin\n5\nWith an initial learning rate of 0.1, it starts converging (<90% error)\nafter several epochs, but still reaches similar accuracy.\n7\n\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nplain-20\nplain-32\nplain-44\nplain-56\n0123456\n0\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nResNet-20\nResNet-32\nResNet-44\nResNet-56\nResNet-110\n56-layer\n20-layer\n110-layer\n20-layer\n456\n0\n1\n5\n10\n20\niter. (1e4)\nerror (%)\n \n \nresidual-110\nresidual-1202\nFigure 6. Training onCIFAR-10.  Dashed lines denote training error, and bold lines denote testing error.Left: plain networks.  The error\nof plain-110 is higher than 60% and not displayed.Middle: ResNets.Right: ResNets with 110 and 1202 layers.\n020406080100\n1\n2\n3\nlayer index (sorted by magnitude)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\n020406080100\n1\n2\n3\nlayer index (original)\nstd\n \n \nplain-20\nplain-56\nResNet-20\nResNet-56\nResNet-110\nFigure 7. Standard deviations (std) of layer responses on CIFAR-\n10. The responses are the outputs of each 3×3 layer, after BN and\nbefore nonlinearity.Top:  the layers are shown in their original\norder.Bottom: the responses are ranked in descending order.\nnetworks such as FitNet [35] and Highway [42] (Table 6),\nyet is among the state-of-the-art results (6.43%, Table 6).\nAnalysis of Layer Responses.Fig. 7 shows the standard\ndeviations (std) of the layer responses.  The responses are\nthe outputs of each 3×3 layer,  after BN and before other\nnonlinearity  (ReLU/addition).For  ResNets,  this  analy-\nsis reveals the response strength of the residual functions.\nFig. 7 shows that ResNets have generally smaller responses\nthan their plain counterparts.  These results support our ba-\nsic  motivation  (Sec.3.1)  that  the  residual  functions  might\nbe generally closer to zero than the non-residual functions.\nWe also notice that the deeper ResNet has smaller magni-\ntudes of responses, as evidenced by the comparisons among\nResNet-20,  56,  and  110  in  Fig.  7.   When  there  are  more\nlayers, an individual layer of ResNets tends to modify the\nsignal less.\nExploring Over 1000 layers.We explore an aggressively\ndeep  model  of  over  1000  layers.   We  setn= 200that\nleads to a 1202-layer network, which is trained as described\nabove.   Our method showsno optimization difficulty,  and\nthis10\n3\n-layer  network  is  able  to  achievetraining  error\n<0.1%  (Fig.  6,  right).    Its  test  error  is  still  fairly  good\n(7.93%, Table 6).\nBut there are still open problems on such aggressively\ndeep models.  The testing result of this 1202-layer network\nis worse than that of our 110-layer network, although both\ntraining data07+1207++12\ntest dataVOC 07 testVOC 12 test\nVGG-1673.270.4\nResNet-101\n76.473.8\nTable  7.  Object  detection  mAP  (%)  on  the  PASCAL  VOC\n2007/2012  test  sets  usingbaselineFaster  R-CNN.  See  also  Ta-\nble 10 and 11 for better results.\nmetricmAP@.5mAP@[.5, .95]\nVGG-1641.521.2\nResNet-10148.427.2\nTable 8. Object detection mAP (%) on the COCO validation set\nusingbaselineFaster R-CNN. See also Table 9 for better results.\nhave similar training error. We argue that this is because of\noverfitting.  The 1202-layer network may be unnecessarily\nlarge (19.4M) for this small dataset.  Strong regularization\nsuch as maxout [10] or dropout [14] is applied to obtain the\nbest results ([10, 25, 24, 35]) on this dataset.  In this paper,\nwe use no maxout/dropout and just simply impose regular-\nization via deep and thin architectures by design,  without\ndistracting  from  the  focus  on  the  difficulties  of  optimiza-\ntion.  But combining with stronger regularization may im-\nprove results, which we will study in the future.\n4.3. Object Detection on PASCAL and MS COCO\nOur  method  has  good  generalization  performance  on\nother recognition tasks.  Table 7 and  8 show the object de-\ntection baseline results on PASCAL VOC 2007 and 2012\n[5] and COCO [26]. We adoptFaster R-CNN[32] as the de-\ntection method. Here we are interested in the improvements\nof replacing VGG-16 [41] with ResNet-101. The detection\nimplementation (see appendix) of using both models is the\nsame, so the gains can only be attributed to better networks.\nMost remarkably, on the challenging COCO dataset we ob-\ntain a 6.0% increase in COCO’s standard metric (mAP@[.5,\n.95]),  which is a 28% relative improvement.  This gain is\nsolely due to the learned representations.\nBased on deep residual nets,  we won the 1st places in\nseveral tracks in ILSVRC & COCO 2015 competitions: Im-\nageNet detection, ImageNet localization, COCO detection,\nand COCO segmentation. The details are in the appendix.\n8\n\nReferences\n[1]  Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependen-\ncies with gradient descent is difficult.IEEE Transactions on Neural\nNetworks, 5(2):157–166, 1994.\n[2]  C.  M.  Bishop.Neural  networks  for  pattern  recognition.   Oxford\nuniversity press, 1995.\n[3]  W. L. Briggs, S. F. McCormick, et al.A Multigrid Tutorial.  Siam,\n2000.\n[4]  K. Chatfield, V. Lempitsky, A. Vedaldi, and A. Zisserman. The devil\nis in the details:  an evaluation of recent feature encoding methods.\nInBMVC, 2011.\n[5]  M. Everingham, L. Van Gool, C. K. Williams, J. Winn, and A. Zis-\nserman.  The Pascal Visual Object Classes (VOC) Challenge.IJCV,\npages 303–338, 2010.\n[6]  S. Gidaris and N. Komodakis. Object detection via a multi-region &\nsemantic segmentation-aware cnn model. InICCV, 2015.\n[7]  R. Girshick. Fast R-CNN. InICCV, 2015.\n[8]  R. Girshick, J. Donahue, T. Darrell, and J. Malik.  Rich feature hier-\narchies for accurate object detection and semantic segmentation.  In\nCVPR, 2014.\n[9]  X. Glorot and Y. Bengio.   Understanding the difficulty of training\ndeep feedforward neural networks. InAISTATS, 2010.\n[10]  I.  J.  Goodfellow,  D.  Warde-Farley,  M.  Mirza,  A.  Courville,  and\nY. Bengio. Maxout networks.arXiv:1302.4389, 2013.\n[11]  K. He and J. Sun. Convolutional neural networks at constrained time\ncost. InCVPR, 2015.\n[12]  K. He, X. Zhang, S. Ren, and J. Sun. Spatial pyramid pooling in deep\nconvolutional networks for visual recognition. InECCV, 2014.\n[13]  K. He, X. Zhang, S. Ren, and J. Sun.  Delving deep into rectifiers:\nSurpassing human-level performance on imagenet classification.  In\nICCV, 2015.\n[14]  G.  E.  Hinton,   N.  Srivastava,   A.  Krizhevsky,   I.  Sutskever,   and\nR. R. Salakhutdinov.  Improving neural networks by preventing co-\nadaptation of feature detectors.arXiv:1207.0580, 2012.\n[15]  S. Hochreiter and J. Schmidhuber. Long short-term memory.Neural\ncomputation, 9(8):1735–1780, 1997.\n[16]  S. Ioffe and C. Szegedy.   Batch normalization:  Accelerating deep\nnetwork training by reducing internal covariate shift. InICML, 2015.\n[17]  H. Jegou, M. Douze, and C. Schmid. Product quantization for nearest\nneighbor search.TPAMI, 33, 2011.\n[18]  H.  Jegou,   F.  Perronnin,   M.  Douze,   J.  Sanchez,   P.  Perez,   and\nC. Schmid. Aggregating local image descriptors into compact codes.\nTPAMI, 2012.\n[19]  Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick,\nS. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for\nfast feature embedding.arXiv:1408.5093, 2014.\n[20]  A. Krizhevsky.   Learning multiple layers of features from tiny im-\nages.Tech Report, 2009.\n[21]  A. Krizhevsky, I. Sutskever, and G. Hinton.  Imagenet classification\nwith deep convolutional neural networks. InNIPS, 2012.\n[22]  Y.  LeCun,  B.  Boser,  J.  S.  Denker,  D.  Henderson,  R.  E.  Howard,\nW. Hubbard,  and L. D. Jackel.   Backpropagation applied to hand-\nwritten zip code recognition.Neural computation, 1989.\n[23]  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. M\n ̈\nuller. Efficient backprop.\nInNeural Networks: Tricks of the Trade, pages 9–50. Springer, 1998.\n[24]  C.-Y.  Lee,  S.  Xie,  P.  Gallagher,  Z.  Zhang,  and  Z.  Tu.    Deeply-\nsupervised nets.arXiv:1409.5185, 2014.\n[25]  M. Lin, Q. Chen, and S. Yan. Network in network.arXiv:1312.4400,\n2013.\n[26]  T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan,\nP. Doll\n ́\nar, and C. L. Zitnick.  Microsoft COCO: Common objects in\ncontext. InECCV. 2014.\n[27]  J. Long, E. Shelhamer, and T. Darrell.  Fully convolutional networks\nfor semantic segmentation. InCVPR, 2015.\n[28]  G. Mont\n ́\nufar, R. Pascanu, K. Cho, and Y. Bengio.  On the number of\nlinear regions of deep neural networks. InNIPS, 2014.\n[29]  V. Nair and G. E. Hinton.  Rectified linear units improve restricted\nboltzmann machines. InICML, 2010.\n[30]  F. Perronnin and C. Dance. Fisher kernels on visual vocabularies for\nimage categorization. InCVPR, 2007.\n[31]  T. Raiko, H. Valpola, and Y. LeCun.  Deep learning made easier by\nlinear transformations in perceptrons. InAISTATS, 2012.\n[32]  S. Ren,  K. He,  R.  Girshick,  and J.  Sun.   Faster  R-CNN: Towards\nreal-time object detection with region proposal networks.  InNIPS,\n2015.\n[33]  S. Ren, K. He, R. Girshick, X. Zhang, and J. Sun.  Object detection\nnetworks on convolutional feature maps.arXiv:1504.06066, 2015.\n[34]  B. D. Ripley.Pattern recognition and neural networks.  Cambridge\nuniversity press, 1996.\n[35]  A.  Romero,  N.  Ballas,  S.  E.  Kahou,  A.  Chassang,  C.  Gatta,  and\nY. Bengio. Fitnets: Hints for thin deep nets. InICLR, 2015.\n[36]  O.  Russakovsky,  J.  Deng,  H.  Su,  J.  Krause,  S.  Satheesh,  S.  Ma,\nZ. Huang,  A. Karpathy,  A. Khosla,  M. Bernstein,  et al.   Imagenet\nlarge scale visual recognition challenge.arXiv:1409.0575, 2014.\n[37]  A. M. Saxe, J. L. McClelland, and S. Ganguli.   Exact solutions to\nthe nonlinear dynamics of learning in deep linear neural networks.\narXiv:1312.6120, 2013.\n[38]  N. N. Schraudolph. Accelerated gradient descent by factor-centering\ndecomposition. Technical report, 1998.\n[39]  N. N. Schraudolph.   Centering neural network gradient factors.   In\nNeural  Networks:   Tricks  of  the  Trade,  pages  207–226.  Springer,\n1998.\n[40]  P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y. Le-\nCun.   Overfeat:  Integrated  recognition,  localization  and  detection\nusing convolutional networks. InICLR, 2014.\n[41]  K. Simonyan and A. Zisserman.  Very deep convolutional networks\nfor large-scale image recognition. InICLR, 2015.\n[42]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Highway networks.\narXiv:1505.00387, 2015.\n[43]  R. K. Srivastava, K. Greff, and J. Schmidhuber.  Training very deep\nnetworks.1507.06228, 2015.\n[44]  C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Er-\nhan, V. Vanhoucke, and A. Rabinovich.  Going deeper with convolu-\ntions. InCVPR, 2015.\n[45]  R. Szeliski.  Fast surface interpolation using hierarchical basis func-\ntions.TPAMI, 1990.\n[46]  R. Szeliski.  Locally adapted hierarchical basis preconditioning.  In\nSIGGRAPH, 2006.\n[47]  T. Vatanen, T. Raiko, H. Valpola, and Y. LeCun.  Pushing stochas-\ntic gradient towards second-order methods–backpropagation learn-\ning  with  transformations  in  nonlinearities.   InNeural  Information\nProcessing, 2013.\n[48]  A. Vedaldi and B. Fulkerson.  VLFeat: An open and portable library\nof computer vision algorithms, 2008.\n[49]  W. Venables and B. Ripley.   Modern applied statistics with s-plus.\n1999.\n[50]  M. D. Zeiler and R. Fergus. Visualizing and understanding convolu-\ntional neural networks. InECCV, 2014.\n9\n\nA. Object Detection Baselines\nIn this section we introduce our detection method based\non the baseline Faster R-CNN [32] system. The models are\ninitialized by the ImageNet classification models, and then\nfine-tuned  on  the  object  detection  data.   We  have  experi-\nmented with ResNet-50/101 at the time of the ILSVRC &\nCOCO 2015 detection competitions.\nUnlike VGG-16 used in [32], our ResNet has no hidden\nfc layers.   We adopt the idea of “Networks on Conv fea-\nture maps” (NoC) [33] to address this issue.  We compute\nthe  full-image  shared  conv  feature  maps  using  those  lay-\ners whose strides on the image are no greater than 16 pixels\n(i.e., conv1, conv2\nx, conv3x, and conv4x, totally 91 conv\nlayers in ResNet-101; Table 1). We consider these layers as\nanalogous to the 13 conv layers in VGG-16, and by doing\nso, both ResNet and VGG-16 have conv feature maps of the\nsame total stride (16 pixels).  These layers are shared by a\nregion proposal network (RPN, generating 300 proposals)\n[32] and a Fast R-CNN detection network [7].  RoI pool-\ning [7] is performed before conv5\n1.   On this RoI-pooled\nfeature, all layers of conv5x and up are adopted for each\nregion, playing the roles of VGG-16’s fc layers.  The final\nclassification layer is replaced by two sibling layers (classi-\nfication and box regression [7]).\nFor the usage of BN layers, after pre-training, we com-\npute the BN statistics (means and variances) for each layer\non the ImageNet training set. Then the BN layers are fixed\nduring  fine-tuning  for  object  detection.   As  such,  the  BN\nlayers become linear activations with constant offsets and\nscales, and BN statistics are not updated by fine-tuning. We\nfix the BN layers mainly for reducing memory consumption\nin Faster R-CNN training.\nPASCAL VOC\nFollowing [7, 32], for the PASCAL VOC 2007testset,\nwe use the 5ktrainvalimages in VOC 2007 and 16ktrain-\nvalimages in VOC 2012 for training (“07+12”).   For the\nPASCAL VOC 2012testset, we use the 10ktrainval+test\nimages in VOC 2007 and 16ktrainvalimages in VOC 2012\nfor  training  (“07++12”).   The  hyper-parameters  for  train-\ning Faster R-CNN are the same as in [32].  Table 7 shows\nthe results.  ResNet-101 improves the mAP by>3% over\nVGG-16.  This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe  MS  COCO  dataset  [26]  involves  80  object  cate-\ngories.   We  evaluate  the  PASCAL  VOC  metric  (mAP  @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation.  Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images.  The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset.  ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network.  Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly  as  big  as  mAP@.5’s  (6.9%).   This  suggests  that  a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions.  These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox.  We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6].  Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal  context.We  combine  global  context  in  the  Fast\nR-CNN step.  Given the full-image conv feature map,  we\npool a feature by global Spatial Pyramid Pooling [12] (with\na  “single-level”  pyramid)  which  can  be  implemented  as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature.  This global feature is con-\ncatenated with the original per-region feature, followed by\nthe  sibling  classification  and  box  regression  layers.   This\nnew  structure  is  trained  end-to-end.   Global  context  im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32],  where the image’s\nshorter side iss= 600pixels.  Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature  pyramid,  and  in  [33]  by  using  maxout  layers.   In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time.  In addition, we have per-\nformed  multi-scale  testing  only  for  the  Fast  R-CNN  step\n(but not yet for the RPN step).  With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5  79.0  70.9  65.5  52.1  83.1  84.7  86.4  52.0  81.9  65.7  84.8  84.6  77.5  76.7  38.8  73.6  73.9  83.0  72.6\nbaselineResNet-10107+1276.479.8  80.7  76.2  68.3  55.9  85.1  85.389.856.7  87.8  69.4  88.3  88.9  80.9  78.4  41.7  78.6  79.8  85.3  72.0\nbaseline+++ResNet-101COCO+07+1285.690.0  89.6  87.8  80.8  76.1  89.9  89.989.675.5  90.0  80.7  89.6  90.3  89.1  88.7  65.4  88.1  85.6  89.0  86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set.  The baseline is the Faster R-CNN system.  The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse   mbike  person   plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9  79.8  74.3  53.9  49.8  77.5  75.9  88.5  45.6  77.1  55.3  86.9  81.7  80.9  79.6  40.1  72.6  60.9  81.2  61.5\nbaselineResNet-10107++1273.886.5  81.6  77.2  58.0  51.0  78.6  76.6  93.2  48.6  80.4  59.0  92.1  85.3  84.8  80.7  48.1  77.3  66.5  84.7  65.6\nbaseline+++ResNet-101COCO+07++1283.892.1  88.4  84.8  75.9  71.4  86.3  87.8  94.2  66.8  89.4  69.2  93.9  91.9  90.9   89.6  67.9  88.2  76.8  90.3  80.0\nTable  11.  Detection  results  on  the  PASCAL  VOC  2012  test  set  (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4).  The baseline is the Faster R-CNN system.  The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33].  RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server.  Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks.  We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed  by  an  ensemble  of  per-region  classifiers.   Table  9\nshows our result based on an ensemble of 3 networks.  The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted.  By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n.  The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories.   The  accuracy  is  evaluated  by  mAP@.5.   Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9.   The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data.  We split the validation set\ninto two parts (val1/val2) following [8].  We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation.  In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used.   In the “testing” column,  “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify  and  localize  the  objects.   Following  [40,  41],  we\nassume that the image-level classifiers are first adopted for\npredicting  the  class  labels  of  an  image,  and  the  localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes.  We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class.  We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion.   We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications.  Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32].  Theclsandreglayers\nare both in aper-classfrom,  in contrast to [32].   Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning  an  object  class;  thereglayer  has  a  1000×4-d  output\nconsisting of box regressors for 1000 classes.  As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly  sample  224×224  crops  for  data  augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32].  For testing,\nthe network is applied on the image fully-convolutionally.\nTable  13  compares  the  localization  results.   Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction.  VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes.  Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%.  This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32].  One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this,  in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class.   These\npredicted  boxes  play  a  role  of  class-dependent  proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier.  The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg,  also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis  method  reduces  the  top-5  localization  error  to\n10.6% (Table 13).   This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/1512.03385v1",
+      "updated": "2015-12-10T19:51:55Z",
+      "published": "2015-12-10T19:51:55Z",
+      "title": "Deep Residual Learning for Image Recognition",
+      "summary": "  Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n  The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n",
+      "author": [
+        {
+          "name": "Kaiming He"
+        },
+        {
+          "name": "Xiangyu Zhang"
+        },
+        {
+          "name": "Shaoqing Ren"
+        },
+        {
+          "name": "Jian Sun"
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Tech report",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/1512.03385v1",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/1512.03385v1",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.CV",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "arxiv_2002.09002": {
+    "path": [
+      "rusthorn.pdf"
+    ],
+    "idType": "arxiv",
+    "tags": [],
+    "comments": "",
+    "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1    Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1  [cs.PL]  20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r)  :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers,  as  we  see  in§1.1.  We  propose  a  novel  method  to  tackle  this\nproblem for pointer-manipulating programs underRust-style  ownership, as we\nexplain in§1.2.\n1.1    Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n)  :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor  example,  the  above  CHC  system  onMc91can  be  solved  instantly  by  many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt  can  immediately  returntrue;  or  it  recursively  calls  itself  and  checks  if  the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes  nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely  fresh;  it  is  assumed  to  be  different  from  the  argumentmaof  the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)  :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely  speaking,  SeaHorn  tends  to  even  omit  shallow  address-freshness  checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2    Our Approach: Leverage Rust’s Ownership System\nThis  paper  proposes  a  novel  approach  to  CHC-based  verification  of  pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style  Ownership.Various  styles  ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell  and  at  each  point  of  program  execution,  either  (i)  only  one  alias  has  the\nupdate(write & read)  permission  to  the  cell,  with  any  other  alias  havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith  no  alias  having  the  update  permission  to  it.  In  summary,when  an  alias\ncan  read  some  data(with an update/read permission),any  other  alias  cannot\nmodify the data.\nAs  a  running  example,  let  us  consider  the  program  below,  which  follows\nRust’s  ownership  discipline  (it  is  written  in  the  C  style;  the  Rust  version  is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose  the  ability  to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3    \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values  and  aliases  ofaandbin  evaluatinginc_max(5,3).  Each  line  shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis  simply  expressed  asa\n◦\n!=b\n◦\n;  the  updated  values  ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r)   :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams  to  CHC  systems  for  a  core  language  of  Rust,  prove  correctness  (both\nsoundness  and  completeness)  of  the  translation,  and  confirm  the  effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2    Core Language: Calculus of Ownership and Reference\nWe  formalize  a  core  of  Rust  asCalculus  of  Ownership  and  Reference  (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1    Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::=  (lifetime variable)X,Y::=  (type variable)\nx,y::=  (variable)f,g::=  (function name)L::=  (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::=  +|−|···op\nbool\n::=>=|==|!=|···\nProgram,  Function  and  Label.A program (denoted byΠ) is a set of function\ndefinitions.  A  function  definition  (F)  consists  of  a  function  name,  a  function\nsignature  and  a  set  of  labeled  statements  (L:S).  In  COR,  for  simplicity,  the\ninput/output types of a function are restricted topointer  types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn  instruction  (I)  performs  an  elementary  operation:  mutable  (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater),  getting  a  constant / operation  result / random  integer  (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning  a  pair  (let∗y=  (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n)  =∗x).  An  instruction  of  form\nlet∗y=···implicitly  allocates  new  memory  cells  asy;  also,  some  instruc-\ntions  deallocate  memory  cells  implicitly.  For  simplicity,  every  variable  is  de-\nsigned to be apointerand everyrelease  of  a  variableshould be explicitly an-\nnotated  by  ‘dropx’.  In  addition,  we  provide  swap  instead  of  assignment;  the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Box<T>in Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning  pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory.  In  contrast,  amutable/immutable  reference(orunique/shared  refer-\nence)  borrows  an  update/read  permission  from  an  owning  pointer  or  another\nreference  with  the  deadline  of  alifetimeα(introduced  later).  A  mutable  ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract  time  point  in  the  process  of  computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime  parameterthat a function takes or alocal  lifetime  variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces  a  new  local  lifetime  variable,nowαsets  a  local  lifetime  variable  to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype  traits  and  closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan  element  of  a  pair).  Still,  we  can  eventually  modify/borrow  a  fragment  by\nborrowing the whole variable andsplitting  pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis  borrow-and-split  strategy,  nevertheless,  yields  a  subtle  obstacle  when  we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1  (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1).  Ininc-max,  increment  on  the  mutable  referencemcis  performed  by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe  following  is  the  corresponding  Rust  program,  with  ghost  annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2    Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable  contextΓis  a  finite  set  of  items  of  formx:\na\nT,  whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’  or  ‘†α’  (frozenuntil  lifetimeα).  We  abbreviatex:\nactive\nTasx:T.  A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole  context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The  set  operationA+B(or  more  generally\n∑\nλ\nA\nλ\n)  denotes  the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally,  an  auxiliary  definition  for  a  rule  can  be  presented  just  below,\npossibly in a dotted box.\nProgram  and  Function.The  rules  for  typing  programs  and  functions  are  pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA  R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps  fromentry,  but  that  order  is  not  very  obvious  because  ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso  that,  for  anyI,Π,f,  (Γ,A),  there  exists  at  most  one  (Γ\n′\n,A\n′\n)  such  that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα,  it  just  ensures  the  new  local  lifetime  variable  to  be  earlier  than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet  us  interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas  “the  programΠhas  the\ntype  (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”.  The  type  system  ensures  that  any  program\nhas  at  most  one  type  (which  may  be  a  bit  unclear  because  of  unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3    Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::=  end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::=  [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn  COR,  a  reference  that  lives  after  the  return  from  the  function  should  be  cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete  operational  semantics  is  characterized  by  the  one-step  transition\nrelationC→\nΠ\nC\n′\nand  the  termination  relation  final\nΠ\n(C),  which  can  be  de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction  call  and  return  from  a  function;  the  complete  rules  and  an  example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X]    #int= #P T:= 1    #unit= 0\n3    CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR  programs.  We  first  define  the  logic  for  CHCs  (§3.1).  We  then  formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1    Multi-sorted Logic for Describing CHCs\nTo  begin  with,  we  introduce  a  first-order  multi-sorted  logic  for  describing  the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n)    (elementary formula)  ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::=  same as CORop::=  same as COR\nbool:=unit+unit  true:=inj\n1\n()false:=inj\n0\n()\nX::=  (sort variable)x,y::=  (variable)f::=  (predicate variable)\nWe  introduceboxσandmutσ,  which  correspond  toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ  σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n)    for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ)    for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x)    [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n)    [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n)    [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|=  ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2    Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas  follows.\nˇ\nPis  a  meta-variable  for  a  non-mutable-reference  pointer  kind,  i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|)    (|\nˇ\nP T|) :=box(|T|)    (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|)    (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define  ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2  (CHC  Representation).We  present  below  the  CHC  representation\noftake-maxdescribed  in§2.1.  We  omit  CHCs  oninc-maxhere.  We  have  also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3    Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We  use{|···|}(instead  of{···})  for  the  intensional  description  of\na  multiset.A⊕B(or  more  generally\n⊕\nλ\nA\nλ\n)  denotes  the  multiset  sum  (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM)  is  defined  as  follows.\n21\nHere,  amemory  footprintMis  a  finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2]    for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation.  Here, for simplicity,fis restricted  to one that does  not  take\nlifetime  parameters  (we  call  such  a  functionsimple;  the  input/output  types\nof  a  simple  function  cannot  contain  references).  We  definef\nCOS\nΠ\nas  the  pred-\nicate  (on  values  of  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|)  iff’s  input/output  types  are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness  Theorem.Finally,  the  correctness  (both  soundness  and  com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1  (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract  variables, which is strongly related toprophecy  variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC  resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based  model, defined analogously\nto  the  COS-based  model,  isequivalentto  the  least  model  of  the  CHC  repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4    Advanced Examples\nWe  give  advanced  examples  of  pointer-manipulating  Rust  programs  and  their\nCHC  representations.  For  readability,  we  write  programs  in  Rust  (with  ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily  deep  ancestor.  Interestingly,  each  recursive  call  tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)  :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r)  :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive  data  structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box<List>), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive  data  type.take_sometakes  a  mutable  reference  to  a  list  and  returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements  of  a  list.inc_someincrements  some  element  of  a  list  via  a  mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA  crucial  technique  used  here  issubdivision  of  a  mutable  reference,  which  is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)  :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r)  :⇐⇒r=sum(xs)\nIncSome(xs,r)  :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5    Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional  programming  language,  which  allows  us  to  usevarious  verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly  guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom  guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat  returns,  with  the  main  return  value,  the  updated  version  of  the  closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan  also  be  modeled  by  combination  of  (i)  and  (ii).  Further  investigation  on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter  the  libraries  are  verified,  we  can  turn  to  our  method.  For  an  easy\nexample,Vec[58]  can  be  represented  simply  as  a  functional  array;  a  muta-\nble/immutable  slice&mut[T]/&[T]can  be  represented  as  an  array  of  muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCell<T>address to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory  leaks[52] caused for\nexample  by  combination  ofRefCellandRc[57],  because  they  obfuscate  the\nownership  release  of  mutable  references.  We  think  that  use  ofRcetc.  should\nrather be restricted for smooth verification. Further investigation is needed.\n4    Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments  conducted  with  small  benchmarks  to  confirm  the  effectiveness  of\nour approach.\n4.1    Implementation of RustHorn\nWe  implemented  a  prototype  verification  toolRustHorn(available  athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports  basic  features  of  Rust  supported  in  COR,  including  recursions  and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn  order  to  use  the  MIR,  RustHorn’s  implementation  depends  on  the  unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2    Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe  benchmarks  in  the  groupssimpleandbmcwere  taken  from  SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test),  with  the  Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare  based  on  the  examples  that  have  appeared  in§1  and§3.4.  The  group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable  references  to  mutable  references.  The  groupslistsandtreesfeature\ndestructive  updates  on  recursive  data  structures  (lists  and  trees)  via  mutable\nreferences, with one interesting program of it explained in§3.4.\nWe  conducted  experiments  on  a  commodity  laptop  (2.6GHz  Intel  Core  i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby  RustHorn  and  SeaHorn  (version  0.1.0-rc3)  [23]  translate  into  CHCs  in  the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions  for  some  of  SeaHorn’s  CHC  outputs,  adding  constraints  on  address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3    Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max,  the  address-taking  parts  were  already  re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer  w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm    timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout   timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec     base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error  timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error  timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error  timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error  timeoutfalse alarm\nunsafetool error0.40.1\nTable  1.Benchmarks  and  experimental  results  on  RustHorn  and  SeaHorn,  with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting  ‘unsafe’  for  a  safe  program;  “tool  error”  is  a  tool  error  of  Spacer,  which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5    Related Work\nCHC-based  Verification  of  Pointer-Manipulating  Programs.SeaHorn  [23]  is  a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother  approach  is  taken  by  JayHorn  [37,36],  which  translates  Java  pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing  special  predicatespullandpush.  Although  this  allows  faster  reasoning\nabout  the  heap  than  the  array-based  approach,  it  can  suffer  from  more  false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt  [32]  aims  to  formally  prove  high-level  safety  properties  for  Rust\nlibraries  with  unsafe  internal  implementation,  using  manual  reasoning  on  the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver  [49].  Although  it  clears  out  pointers  to  get  simple  models  like  our  ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple  static  tracking  of  addresses  based  on  lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27]  and  type  traits  [17].  Astrauskas  et  al.  [3]  conduct  semi-automated  verifi-\ncation  (manually  providing  pre/post-conditions  and  loop  invariants)  on  many\nrealistic  examples.  Because  Viper  is  based  onfractional  ownership,  however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification  using  Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased  on  ownership  is  also  studied  well  [7,50,35].  Some  verification  platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased  verification  are  based  on  fractional  or  counting  ownership.  Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy  Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences  in  Rust.  This  logic  can  probably  be  used  for  generalizing  our  idea  as\nsuggested in§3.5.\n6    Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents  a  mutable  reference  as  a  pair  of  values,  the  current  value  and  the\nfuture value at the time of release. We have formalized the method for a core\nlanguage  of  Rust  and  proved  its  correctness.  We  have  implemented  a  proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This  work  was  supported  by  JSPS  KAKENHI  Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1.  Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2.  Alberti,  F.,  Bruttomesso,  R.,  Ghilardi,  S.,  Ranise,  S.,  Sharygina,  N.:  Lazy  ab-\nstraction  with  interpolants  for  arrays.  In:  Bjørner,  N.,  Voronkov,  A.  (eds.)\nLogic  for  Programming,  Artificial  Intelligence,  and  Reasoning  -  18th  Interna-\ntional  Conference,  LPAR-18,  M ́erida,  Venezuela,  March  11-15,  2012.  Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3.  Astrauskas,  V.,  M ̈uller,  P.,  Poli,  F.,  Summers,  A.J.:  Leveraging  Rust  types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4.  Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5.  Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6.  Bjørner,   N.,   Gurfinkel,   A.,   McMillan,   K.L.,   Rybalchenko,   A.:   Horn   clause\nsolvers  for  program  verification.  In:  Beklemishev,  L.D.,  Blass,  A.,  Dershowitz,\nN.,  Finkbeiner,  B.,  Schulte,  W.  (eds.)  Fields  of  Logic  and  Computation  II\n-  Essays  Dedicated  to  Yuri  Gurevich  on  the  Occasion  of  His  75th  Birthday.\nLecture  Notes  in  Computer  Science,  vol.  9300,  pp.  24–51.  Springer  (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7.  Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin  separation  logic.  In:  Palsberg,  J.,  Abadi,  M.  (eds.)  Proceedings  of  the  32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8.  Boyapati,   C.,   Lee,   R.,   Rinard,   M.C.:   Ownership   types   for   safe   program-\nming:   Preventing   data   races   and   deadlocks.   In:   Ibrahim,   M.,   Matsuoka,\nS.   (eds.)   Proceedings   of   the   2002   ACM   SIGPLAN   Conference   on   Object-\nOriented  Programming  Systems,  Languages  and  Applications,  OOPSLA  2002,\nSeattle,  Washington,  USA,  November  4-8,  2002.  pp.  211–230.  ACM  (2002).\nhttps://doi.org/10.1145/582419.582440\n9.  Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10.  Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11.  Champion,  A.,  Chiba,  T.,  Kobayashi,  N.,  Sato,  R.:  ICE-based  refinement  type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12.  Champion,  A.,  Kobayashi,  N.,  Sato,  R.:  HoIce:  An  ICE-based  non-linear  Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium,  APLAS  2018,  Wellington,  New  Zealand,  December  2-6,  2018,  Pro-\nceedings.  Lecture  Notes  in  Computer  Science,  vol.  11275,  pp.  146–156.  Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13.  Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn:  Freeman-Benson,  B.N.,  Chambers,  C.  (eds.)  Proceedings  of  the  1998  ACM\nSIGPLAN  Conference  on  Object-Oriented  Programming  Systems,  Languages  &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14.  Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15.  Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16.  van   Emden,   M.H.,   Kowalski,   R.A.:   The   semantics   of   predicate   logic   as\na   programming   language.   Journal   of   the   ACM23(4),   733–742   (1976).\nhttps://doi.org/10.1145/321978.321991\n17.  Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18.  Fedyukovich,  G.,  Kaufman,  S.J.,  Bod ́ık,  R.:  Sampling  invariants  from  frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19.  Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20.  Foster,  J.N.,  Greenwald,  M.B.,  Moore,  J.T.,  Pierce,  B.C.,  Schmitt,  A.:  Com-\nbinators  for  bidirectional  tree  transformations:  A  linguistic  approach  to  the\nview-update  problem.  ACM  Trans.  Program.  Lang.  Syst.29(3),17  (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21.  Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22.  Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware  verifiers  from  proof  rules.  In:  Vitek,  J.,  Lin,  H.,  Tip,  F.  (eds.)  ACM\nSIGPLAN  Conference  on  Programming  Language  Design  and  Implementation,\nPLDI  ’12,  Beijing,  China  -  June  11  -  16,  2012.  pp.  405–416.  ACM  (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23.  Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n-  27th  International  Conference,  CAV  2015,  San  Francisco,  CA,  USA,  July  18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24.  Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25.  Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems.  In:  Zimmermann,  T.,  Cleland-Huang,  J.,  Su,  Z.  (eds.)  Proceedings  of\nthe  24th  ACM  SIGSOFT  International  Symposium  on  Foundations  of  Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26.  Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27.  Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28.  Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl  in  compositional  verification.  In:  Castagna,  G.,  Gordon,  A.D.  (eds.)  Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages,  POPL  2017,  Paris,  France,  January  18-20,  2017.  pp.  473–485.  ACM\n(2017). https://doi.org/10.1145/3009837\n29.  Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA.  (eds.)  2018  Formal  Methods  in  Computer  Aided  Design,  FMCAD  2018,\nAustin,  TX,  USA,  October  30  -  November  2,  2018.  pp.  1–7.  IEEE  (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30.  Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31.  Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32.  Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33.  Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28,  e20 (2018). https://doi.org/10.1017/S0956796818000151\n34.  Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35.  Jung,  R.,  Swasey,  D.,  Sieczkowski,  F.,  Svendsen,  K.,  Turon,  A.,  Birkedal,  L.,\nDreyer,  D.:  Iris:  Monoids  and  invariants  as  an  orthogonal  basis  for  concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL  2015,  Mumbai,  India,  January  15-17,  2015.  pp.  637–650.  ACM  (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36.  Kahsai,  T.,  Kersten,  R.,  R ̈ummer,  P.,  Sch ̈af,  M.:  Quantified  heap  invariants  for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37.  Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38.  Kalra,  S.,  Goel,  S.,  Dhawan,  M.,  Sharma,  S.:Zeus:  Analyzing  safety  of  smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39.  Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40.  Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41.  Lahiri,  S.K.,  Bryant,  R.E.:  Constructing  quantified  invariants  via  predicate  ab-\nstraction.  In:  Steffen,  B.,  Levi,  G.  (eds.)  Verification,  Model  Checking,  and  Ab-\nstract  Interpretation,  5th  International  Conference,  VMCAI  2004,  Venice,  Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42.  Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n-  16th  International  Symposium,  ATVA  2018,  Los  Angeles,  CA,  USA,  October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43.  Lattner,  C.,  Adve,  V.S.:  Automatic  pool  allocation:  Improving  performance  by\ncontrolling  data  structure  layout  in  the  heap.  In:  Sarkar,  V.,  Hall,  M.W.  (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign  and  Implementation,  Chicago,  IL,  USA,  June  12-15,  2005.  pp.  129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44.  Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45.  Matsakis,  N.D.:  Introducing  MIR  (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46.  Matsakis,  N.D.,  Klock  II,  F.S.:  The  Rust  language.  In:  Feldman,  M.,  Taft,  S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47.  Matsushita,  Y.,  Tsukada,  T.,  Kobayashi,  N.:  RustHorn:  CHC-based  verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48.  Microsoft:   Boogie:   An   intermediate   verification   language   (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49.  de  Moura,  L.M.,  Kong,  S.,  Avigad,  J.,  van  Doorn,  F.,  von  Raumer,  J.:  The\nLean   theorem   prover   (system   description).   In:   Felty,   A.P.,   Middeldorp,   A.\n(eds.)  Automated  Deduction  -  CADE-25  -  25th  International  Conference  on\nAutomated  Deduction,  Berlin,  Germany,  August  1-7,  2015,  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   9195,   pp.   378–388.   Springer   (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50.  M ̈uller,  P.,  Schwerhoff,  M.,  Summers,  A.J.:  Viper:  A  verification  infrastructure\nfor  permission-based  reasoning.  In:  Jobstmann,  B.,  Leino,  K.R.M.  (eds.)  Verifi-\ncation,  Model  Checking,  and  Abstract  Interpretation  -  17th  International  Con-\nference,  VMCAI  2016,  St.  Petersburg,  FL,  USA,  January  17-19,  2016.  Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51.  Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52.  Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage  (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53.  Rust  Community:  RFC  2025:  Nested  method  calls  (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54.  Rust  Community:  RFC  2094:  Non-lexical  lifetimes  (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55.  Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56.  Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57.  Rust  Community:  std::rc::Rc  -  Rust  (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58.  Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59.  Rust  Community:  Two-phase  borrows  (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60.  Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61.  Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62.  Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63.  Suenaga,  K.,  Kobayashi,  N.:  Fractional  ownerships  for  safe  memory  dealloca-\ntion.  In:  Hu,  Z.  (ed.)  Programming  Languages  and  Systems,  7th  Asian  Sym-\nposium,  APLAS  2009,  Seoul,  Korea,  December  14-16,  2009.  Proceedings.  Lec-\nture   Notes   in   Computer   Science,   vol.   5904,   pp.   128–143.   Springer   (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64.  Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming  Language  Design  and  Implementation,  Tucson,  AZ,  USA,  June  7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65.  Toman,  J.,  Pernsteiner,  S.,  Torlak,  E.:crust:  A  bounded  verifier  for  Rust.\nIn:  Cohen,  M.B.,  Grunske,  L.,  Whalen,  M.  (eds.)  30th  IEEE/ACM  Interna-\ntional  Conference  on  Automated  Software  Engineering,  ASE  2015,  Lincoln,\nNE,  USA,  November  9-13,  2015.  pp.  75–80.  IEEE  Computer  Society  (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66.  Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67.  Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68.  Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity  of  Cambridge,  UK  (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69.  Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen  AccessThis  chapter  is  licensed  under  the  terms  of  the  Creative  Commons\nAttribution  4.0  International  License  (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe  source,  provide  a  link  to  the  Creative  Commons  license  and  indicate  if  changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA    Complementary Definitions on COR\nA.1    Complete Typing Rules for Instructions\nThe  following  is  the  complete  rules  for  the  typing  judgment  on  instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P  R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy  unit:copy  immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses  nested  releases  of  memory  cells.  For  translation  to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ  Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2    Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5  (Execution on Concrete Operational Semantics).The following is an\nexample  execution  for  the  COR  program  of  Example  1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB    Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐=  ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐=  ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif  Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=  ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC    Proof of the Correctness of the CHC Representation\nC.1    Abstract Operational Semantics\nWe  introduceabstract  operation  semanticsfor  COR,  as  a  mediator  between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe  get  rid  of  heaps  and  directly  represent  each  variable  as  a  value  with  such\nfuture  values  expressed  asabstract  variablesx(marked  bold  and  light  blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded  as  anabstract  configurationC,  which  is  defined  as  follows.  Here,  an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::=  end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::=  [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion  toC,  which  does  not  directly  affect  the  execution.Ais  aglobal  lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust  as  concrete  operational  semantics,  abstract  operational  semantics  is\ncharacterized  by  the  one-step  transition  relationC →\nΠ\nC\n′\nand  the  termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with  ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k<n}\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ+{(α,α\n(n)\n)}\nF;S |\n({α\n(n)\n}+A,{α\n(n)\n}×({α\n(n)\n}+A\nex\n)+R)\nS\nΠ,f,L\n=nowα;gotoL\n′\n[f,L]\n{(α,α\n(n)\n)}+Θ\nF;S |\n({α\n(n)\n}+A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,{(β\n(k)\n,γ\n(l)\n)∈R|β\n(k)\n6=α\n(n)\n})\nS\nΠ,f,L\n=α≤β;gotoL\n′\n[f,L]\nΘ\nF;S |\n(A,R)\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\n(A,({(Θ(α),Θ(β))}+R)\n+\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈const〉)};S |\nA\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x)) [[op]] val(F(x\n′\n))〉)};S |\nA\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈inj\ni\nˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n〉)};S |\nA\n→\nΠ\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n〉)};S |\nA\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)x\n◦!\nis fresh\n[f,L]\nΘ\nF+{(x,〈inj\ni\nˆv\n∗!\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\ni\n]\nΘ\nF+{(y\ni\n,〈ˆv\n∗!\n,x\n◦!\n〉)};S |\nA\n)[\ninj\ni\nx\n◦!\n/x\n◦\n]\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n[f,L]\nΘ\nF+{(x\n0\n,ˆv\n∗0\n),(x\n1\n,ˆv\n∗1\n)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n)〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n〉),(y\n1\n,〈ˆv\n∗1\n〉)};S |\nA\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nx\n◦0\n,x\n◦1\nare fresh\n[f,L]\nΘ\nF+{(x,〈(ˆv\n∗0\n,ˆv\n∗1\n),x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y\n0\n,〈ˆv\n∗0\n,x\n◦0\n〉),(y\n1\n,〈ˆv\n∗1\n,x\n◦1\n〉)};S |\nA\n)[\n(x\n◦0\n,x\n◦1\n)/x\n◦\n]\n\n40Y. Matsushita et al.\nExample 6  (Execution on Abstract Operaitonal Semantics).The following is an\nexample  execution  on  abstract  operational  semantics  for  Example  1.  It  corre-\nsponds to Example 5, the example execution on concrete operational semantics.\nHere,A:= ({α},Id\n{α}\n) andΘ:={α,α\n(0)\n}.\n[inc-max,entry]\n∅\n{(oa,〈4〉),(ob,〈3〉)}|\n(∅,∅)\n→[inc-max,entry]\nΘ\n{(oa,〈4〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L3]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,entry]\nΘ\n{(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L1]\nΘ\n{(ord,〈inj\n1\n()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→[take-max,L2]\nΘ\n{(ou,〈()〉),(ma,〈4,a\n◦\n〉),(mb,〈3,b\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈b\n◦\n〉)}|\nA\n→\n+\n[take-max,L4]\nΘ\n{(ma,〈4,a\n◦\n〉)};\n[inc-max,L4]\nΘ\nmc,{(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L4]\nΘ\n{(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L5]\nΘ\n{(o1,〈1〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L7]\nΘ\n{(oc\n′\n,〈5〉),(mc,〈4,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→[inc-max,L8]\nΘ\n{(oc\n′\n,〈4〉),(mc,〈5,a\n◦\n〉),(oa,〈a\n◦\n〉),(ob,〈3〉)}|\nA\n→\n+\n[inc-max,L10]\nΘ\n{(oa,〈5〉),(ob,〈3〉)}|\nA\n→[inc-max,L11]\n∅\n{(oa,〈5〉),(ob,〈3〉)}|\n(∅,∅)\n→\n+\n[inc-max,L14]\n∅\n{(or,〈inj\n1\n()〉)}|\n(∅,∅)\nThe abstract variablesa\n◦\nandb\n◦\nare introduced for mutable borrow ofoaand\nob. By the call oftake-max,mbis released, whereby the variableb\n◦\nis set to the\nvalue 3, and the variablea\n◦\nis passed tomc. After the increment is performed,\nmcis released, and therebya\n◦\nis set to the updated value 5.\nC.2    Safety on Abstract Configurations\nIt is natural to require for an abstract configuration that each variable is shared\nby the borrower and the lender and is not used elsewhere.\n32\nA stack of borrows\n(caused  by  reborrows)  can  be  described  as  a  chain  of  abstract  variables  (e.g.\n〈v,x〉,〈x,y〉,〈y〉).\nTo describe such restrictions, we define thesafetyon an abstract configura-\ntion ‘safe\nΠ\n(C)’. We also showprogression  and  preservationregarding safety on\nabstract operational semantics, as a part of soundness of COR’s type system.\nSummary.Anabstract variable summaryXis a finite multiset of items of form\n‘give\nα\n(x::T)’ or ‘take\n†α\n(x::T)’.\n32\nWe should take care of the cases where a mutable reference is immutably borrowed\n(e.g.immut\nα\nmut\nβ\nT),  because  immutable  references  can  be  unrestrictedly  copied.\nLater when we define ‘summary‘ judgments, we get over this problem usingaccess\nmodes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)41\nNow, ‘summary\na\nD\n(ˆv::T| X)’ (the pre-value  ˆvof typeTyields an abstract\nvariable summaryX, under the access modeDand the activenessa) is defined\nas follows. Here, anaccess modeDis either of form ‘hot’ or ‘cold’.\nsummary\n†α\nD\n(x::T|{take\n†α\n(x::T)})\nsummary\na\nD·\nˇ\nP\n(ˆv::T|X)\nsummary\na\nD\n(〈ˆv〉::\nˇ\nP T|X)\nD·own:=D  D·immut\nβ\n:= cold\nsummary\na\nhot\n(ˆv::T|X)\nsummary\na\nhot\n(〈ˆv,x〉::mut\nβ\nT|X ⊕{give\nβ\n(x::T)})\nsummary\na\ncold\n(ˆv::T|X)\nsummary\na\ncold\n(〈ˆv,x〉::mut\nβ\nT|X)\nsummary\na\nD\n(ˆv::T[μX.T/X]|X)\nsummary\na\nD\n(ˆv::μX.T/X|X)\nsummary\na\nD\n(const::T|∅)\nsummary\na\nD\n(ˆv::T\ni\n|X)\nsummary\na\nD\n(\ninj\ni\nˆv::T\n0\n+T\n1\n∣\n∣\nX\n)\nsummary\na\nD\n(ˆv\n0\n::T\n0\n|X\n0\n)    summary\na\nD\n(ˆv\n1\n::T\n1\n|X\n1\n)\nsummary\na\nD\n(\n(ˆv\n0\n,ˆv\n1\n) ::T\n0\n×T\n1\n∣\n∣\nX\n0\n⊕X\n1\n)\n‘summary\nΘ\n(F::Γ|X)’ (the abstract stack frameFrespecting the variable\ncontextΓyieldsX, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,summary\na\nhot\n(\nF(x) ::TΘ| X\nx\n)\nsummary\nΘ\n(\nF::Γ\n∣\n∣\n⊕\nx:\na\nT∈Γ\nX\nx\n)\nFinally, ‘summary\nΠ\n(C |X)’ (the abstract configurationCyieldsXunder the\nprogramΠ) is defined as follows.\nfor anyi∈[n+ 1],summary\nΘ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|X\ni\n)\nsummary\nΠ\n(\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n∣\n∣\n⊕\nn\ni=0\nX\ni\n)\nLifetime  Safety.‘lifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)’  (the  global  lifetime\ncontextA\nglobal\nwith the lifetime parameter contextΘis safe on lifetimes with\nrespect to the (local) lifetime contextA\nlocal\nfrom the type system and the set of\nlifetime parametersA\nex\nunder the stack frame indexi) is defined as follows.\ndomΘ=|A\nlocal\n|for anyα∈A\nex\n,lettingβ\n(k)\n=Θ(α), k < iholds\nfor anyα∈|A\nlocal\n|−A\nex\n,Θ(α) =α\n(i)\nfor any (α,β)∈|A\nlocal\n|\n2\n−A\n2\nex\n, α≤\nA\nlocal\nβ⇐⇒Θ(α)≤\nA\nglobal\nΘ(β)\nfor anyα,β∈A\n2\nex\n, α≤\nA\nlocal\nβ=⇒Θ(α)≤\nA\nglobal\nΘ(β)\nlifetimeSafe\ni\n(A\nglobal\n,Θ|A\nlocal\n,A\nex\n)\n‘lifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\n’  (A\nglobal\nwith  the  finite  sequence  of\nfunction names, labels and lifetime parameter contexts (f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\nis safe on\nlifetimes under the programΠ) is defined as follows.\nfor anyi∈[n+1],lifetimeSafe\ni\n(A\nglobal\n,Θ\ni\n|A\nΠ,f\ni\n,L\ni\n,A\nexΠ,f\ni\n)\ncard|A\nglobal\n|=\n∑\nn\ni=0\ncard (|A\nΠ,f\ni\n,L\ni\n|−A\nexΠ,f\ni\n)\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nA\nΠ,f,L\n: the lifetime context for the labelLoffinΠcardX: the cardinality ofX\nFinally, ‘lifetimeSafe\nΠ\n(C)’ (the abstract configurationCis safe on lifetimes\nunder the programΠ) is defined as follows.\nlifetimeSafe\nΠ\n(\nA\nglobal\n,(f\ni\n,L\ni\n,Θ\ni\n)\nn\ni=0\n)\nlifetimeSafe\nΠ\n(\n[f\nn\n,L\nn\n]\nΘ\nn\nF\nn\n; [f\nn−1\n,L\nn−1\n]\nΘ\nn−1\nx\nn−1\n,F\nn−1\n;···; [f\n0\n,L\n0\n]\nΘ\n0\nx\n0\n,F\n0\n|\nA\nglobal\n)\n\n42Y. Matsushita et al.\nSafety.We first define the safety on abstract variable summaries. ‘safe\nA\n(x,X)’\nis  defined  as  follows.  Here,T∼\nA\nUmeansT≤\nA\nU∧U≤\nA\nT(thetype\nequivalence).\nX(x) ={|give\nα\n(x::T),take\n†β\n(x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,X)\nX(x) =∅\nsafe\nA\n(x,X)\nX(x): the multiset of the items of form ‘give\nγ\n(x::U)’/‘take\nγ\n(x::U)’ inX\n‘safe\nA\n(X)’ means that safe\nA\n(x,X) holds for anyx.\nFinally, ‘safe\nΠ\n(C)’ is defined as follows.\nsummary\nΠ\n(C |X)    lifetimeSafe\nΠ\n(C)C=···|\nA\nsafe\nA\n(X)\nsafe\nΠ\n(C)\nProperty 1  (Safety on an Abstract Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there existsC\n′\nsatisfyingC →\nΠ\nC\n′\n.\nProof.Clear. The important guarantee the safety on an abstract configuration\nprovides is that, in the pre-value assigned to eachactivevariable, abstract vari-\nables do not appear except in the form〈ˆv,x〉.ut\nLemma 1  (Safety on the Abstract Configuration is Preserved).For any\nΠandC,C\n′\nsuch thatsafe\nΠ\n(C)andC →\nΠ\nC\n′\nhold,safe\nΠ\n(C\n′\n)is satisfied.\nProof.Straightforward. Preservation of safety on the abstract variable summary\nis the point. Below we check some tricky cases.\nType Weakening.Type weakening (xasT) essentially only changes lifetimes on\ntypes. A lifetime on a type can become earlier if it isnotguarded by anymut\nα\n.\nThus only the following changes happen on the abstract variable summary: (i)\nfor an item of form ‘give\nα\n(x::T)’,αcan get earlier andTcan be weakened; and\n(ii) for an item of form ‘take\n†α\n(x::T)’,αdo not change andTcan be weakened.\nMutable (Re)borrow.When we performletmy=mutbor\nα\npx, the abstract vari-\nable summary just gets two new items ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†α\n(x\n◦\n::T)’, for\nsomex\n◦\nandT.\nRelease of a Mutable Reference.When we release a mutable referencemx, whose\npre-value is of form〈ˆv,x\n◦\n〉, only the following changes happen on the abstract\nvariable summary: (i) the items of form ‘give\nα\n(x\n◦\n::T)’ and ‘take\n†β\n(x\n◦\n::T\n′\n)’ are\nremoved; and (ii) since  ˆvmoves to another variable, the type of each abstract\nvariable in ˆvmay change into an equivalent type.\nOwnership Weakening.Similar to a release of a mutable reference.\nSwap.Swap  (swap(∗x,∗y))  actually  does  not  alter  the  abstract  variable  sum-\nmary.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)43\nCopying.When  data  of  typeTis  copied,T:copyholds,  which  ensures  that\neach mutable referencemut\nα\nUinTis guarded by some immutable reference.\nTherefore the abstract variable summary does not change.\nSubdivision  of  a  Mutable  Reference.A mutable reference is subdivided in the\nfollowing forms: pair destruction ‘let(∗mx\n0\n,∗mx\n1\n) =∗mx’, variant destruction\n‘match∗mx{inj\n0\n∗my→gotoL\n0\n,···}’, and dereference ‘letmx=∗mpx’. When a\nmutable referencemxwith a pre-value〈ˆv,x〉is subdivided, the two items of form\ngive\nα\n(x::T)  and  take\n†β\n(x::T\n′\n)  are  accordingly  ‘subdivided’  in  the  abstract\nvariable summary. With a close look, the safety turns out to be preserved.\nElimination  of  a  Local  Lifetime  Variable.Just  after  we  eliminate  a  local  life-\ntime variableα(‘nowα’), since there remains no lifetime variable earlier than\nαin the lifetime context, the abstract variable summary has no item of form\n‘give\nα\n(n)\n(x::T)’  (for  appropriaten).  Therefore,  just  before  (and  just  after)\nthe  lifetime  elimination,  the  abstract  variable  summary  has  no  item  of  form\n‘take\nα\n(n)\n(x::T\n′\n)’.ut\nC.3    SLDC Resolution\nFor CHC representation of a COR program, we introduce a variant of SLD resolu-\ntion, which we callSLDC resolution(Selective Linear Definite clause Calculative\nresolution). Interpreting each CHC as a deduction rule, SLDC resolution can be\nunderstood as atop-downconstruction of a proof tree from the left-hand side.\nSLDC resolution is designed to be complete with respect to the logic (Lemma 2).\nAresolutive  configurationKand apre-resolutive  configuration\nˆ\nKhave the\nfollowing form.\n(resolutive configuration)K::=  ˇφ\n0\n,...,ˇφ\nn−1\n|q\n(pre-resolutive configuration)\nˆ\nK::=φ\n0\n,...,φ\nn−1\n|q\nHere,θmaps variables to patterns. For a termt,tθstands fortwith eachxre-\nplaced withθ(x). For a formulaφ=f(t\n0\n,...,t\nn−1\n),φθstands forf(t\n0\nθ,...,t\nn−1\nθ).\nThe elementary formulas in a resolutive configuration can be understood as a\nmodel of acall stack.qis a pattern that represents thereturned value. This idea\nis later formalized in Appendix C.4.\nK →\n(Φ,Ξ)\nK\n′\n(Kcan  change  intoK\n′\nby  one  step  of  SLDC  resolution  on\n(Φ,Ξ)) is defined by the following non-deterministic transformation fromKto\nK\n′\n.\n1.  The ‘stack’ part ofKshould be non-empty. LetK=f(p\n0\n,...,p\nm−1\n),ˇφ\n1\n,...,\nˇφ\nn\n|q.\nTake fromΦany CHC that unifies with the head of the stack ofK. That is,\nΦis of form∀x\n0\n:σ\n0\n,...,x\nl−1\n:σ\nl−1\n. f(p\n′\n0\n,...,p\n′\nm−1\n)⇐=ψ\n0\n∧···∧ψ\nk−1\nand\np\n′\n0\n,...,p\n′\nm−1\nunify withp\n0\n,...,p\nm−1\n. Let us take the mostgeneralθsuch\nthatp\n0\n=p\n′\n0\nθ,...,p\nm−1\n=p\n′\nm−1\nθhold.\nNow we have a pre-resolutive configuration\nˆ\nK=ψ\n′\n0\n,...,ψ\n′\nk−1\n,ˇφ\n′\n1\n,...,ˇφ\n′\nn\n|q\n′\n,\nwhereψ\n′\ni\n:=ψ\ni\nθ,  ˇφ\n′\nj\n:=  ˇφ\nj\nθandq\n′\n:=qθ.\n\n44Y. Matsushita et al.\n2.  We ‘calculate’\nˆ\nKinto a resolutive configuration. That is, we repeat the fol-\nlowing  operations  to  update  (\nˆ\nKuntilψ\n′\n0\n,...,ψ\n′\nk−1\nall  become  elementary.\nK\n′\nis set to the final version of\nˆ\nK.\n–We substitute variables conservatively until there do not remain terms\nof  form∗x,◦x, x.i, xopt/topx;  for  each  case,  we  replacexwith\n〈x\n∗\n〉/〈x\n∗\n,x\n◦\n〉(depending on the sort),〈x\n∗\n,x\n◦\n〉, (x\n0\n,x\n1\n),n, taking fresh\nvariables.\n–We replace each∗〈t\n∗\n〉/∗〈t\n∗\n,t\n◦\n〉,◦〈t\n∗\n,t\n◦\n〉,(t\n0\n,t\n1\n).i, nopn\n′\nwitht\n∗\n, t\n◦\n, t\ni\n,\nn[[op]]n\n′\n.\n–If there exists a variablexthat occurs only once in the pre-resolutive\nconfiguration\nˆ\nK, then replace it with any value of the suitable sort.\n33\nWe have carefully designed SLDC resolution to match it with abstract opera-\ntional semantics, which assists the proof of Theorem 2.\nLemma 2  (Completeness of SLDC Resolution).For any(Φ,Ξ)andf∈\ndomΞ, the following are equivalent for any valuesv\n0\n,...,v\nn−1\n,wof the appro-\npriate sorts.\n1.M\nleast\n(Φ,Ξ)\n(f)(v\n0\n,...,v\nn−1\n,w)holds.\n2.  There  existK\n0\n,...,K\nN\nsatisfyingK\n0\n=f(v\n0\n,...,v\nn−1\n,r)|res,K\nN\n=|w\nandK\n0\n→\n(Φ,Ξ)\n···→\n(Φ,Ξ)\nK\nN\n.\nProof.Clear by thinking of derivation trees (which can be defined in a natural\nmanner) on CHC system (Φ,Ξ).ut\nC.4    Equivalence  of  the  AOS-based  Model  and  the  CHC\nRepresentation\nWe first show a bisimulation between abstract operational semantics and SLDC\nresolution Lemma 3. Using the bisimulation, we can easily show the equivalence\nof the AOS-based model and (the least model of) the CHC representation.\nBisimulation Lemma.Interestingly, there is abisimulationbetween the tran-\nsition system of abstract operational semantics and the process of SLDC resolu-\ntion.\nF \nθ\nf,L,r\nˇφ(the abstract stack frameFcan be translated into the elementary\nformula  ˇφ,  underθ,f,Landr)  is  defined  as  follows.  Here,θmaps  abstract\nvariables to (normal) variables. ˆvθis the value made from ˆvby replacing eachx\nwithθ(x).ris the abstract variable for taking the result.\nthe items ofFare enumerated as (x\n0\n,ˆv\n0\n),...,(x\nn−1\n,ˆv\nn−1\n)\nF \nθ\nf,L,r\nf\nL\n(ˆv\n0\nθ\n0\n,...,ˆv\nn−1\nθ,rθ)\n33\nWe  use  this  peculiar  rule  to  handle  the  ‘let∗y=rand()’  instruction  later  for\nLemma 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)45\nNow,C \nΠ\nKis defined as follows.\nsafe\nΠ\n(C)C= [f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\nr\n0\n,...,r\nn\nare fresh inC\nF\n0\n \nθ\nf\n0\n,L\n0\n,r\n0\nˇφ\n0\nfor anyi∈[n],F\ni+1\n+{(x\ni+1\n,r\ni\n)} \nθ\nf\ni+1\n,L\ni+1\n,r\ni+1\nˇφ\ni+1\nC \nΠ\nˇφ\n0\n,ˇφ\n1\n,...,ˇφ\nn−1\n|r\nn\nLemma 3  (Bisimulation between Abstract Operational Semantics and\nSLDC Resolution).Take anyΠ,CandKsatisfyingC \nΠ\nK.\nFor  anyC\n′\nsatisfyingC →\nΠ\nC\n′\n,  there  exists  someK\n′\nsatisfyingK →\n(|Π|)\nK\n′\nandC\n′\n \nΠ\nK\n′\n. Likewise, for anyK\n′\nsatisfyingK→\n(|Π|)\nK\n′\n, there exists someC\n′\nsatisfyingC →\nΠ\nC\n′\nandC\n′\n \nΠ\nK\n′\n.\nProof.Straightforward.ut\nAOS-based Model and the Equivalence Theorem.Take anyΠand simple\nf. TheAOS-based model(AOS stands for abstract operational semantics) forf,\ndenoted byf\nAOS\n, is the predicate defined by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)    safe\nΠ\n(C\n0\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L\n′\n]\n∅\n{(y,w)}|\n(∅,∅)\nf\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nNow we can prove the following theorem.\nTheorem 2  (Equivalence of the AOS-based Model and the CHC Rep-\nresentation).For anyΠand simplefinΠ,f\nAOS\nΠ\nis equivalent toM\n(|Π|)\n(f\nentry\n).\nProof.Clear from completeness of SLDC resolution (Lemma 2) and the bisimu-\nlation between abstract operational semantics and SLDC resolution (Lemma 3).\nut\nC.5    Bisimulation between Concrete and Abstract Operational\nSemantics\nExtending  ‘safe\nH\n(F::Γ| F)’  introduced  in§3.3,  we  define  thesafe  readout\n‘safe\nΠ\n(C| C)’  of  an  abstract  configuration  from  a  concrete  configuration.  In-\nterestingly,  the  safe  readout  is  abisimulationbetween  concrete  and  abstract\noperational semantics (Lemma 5). We also establishprogression  and  preserva-\ntionregarding the safe readout, as a part of soundness of COR’s type system\nin terms ofconcrete  operational  semantics, extending the soundness shown for\nabstract operational semantics in Appendix C.2.\nAuxiliary Notions.Anextended abstract variable summary\nˆ\nXis a finite mul-\ntiset  of  items  of  form  ‘give\nα\n(∗a;x::T)’  or  ‘take\n†α\n(∗a;x::T)’,  whereais  an\naddress. Anextended  access  mode\nˆ\nDis of form either ‘hot’ or ‘cold\nα\n’. Anex-\ntended  memory  footprint\nˆ\nMis  a  finite  multiset  of  items  of  form  ‘hot\na\n(a)’  or\n‘cold\nα\n(a)’, whereais an address.\n\n46Y. Matsushita et al.\nReadout.First, ‘readout\na\nH,\nˆ\nD\n(a::T|ˆv;\nˆ\nX,\nˆ\nM)’ and ‘readout\na\nH,\nˆ\nD\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)’\n(the pointer of the addressa/ the data ata, typedT, can be read out from\nthe heapHas a pre-value ˆv, yielding an abstract variable summary\nˆ\nXand an\nextended  memory  footprint\nˆ\nM,  under  the  extended  access  mode\nˆ\nDand  the\nactivenessa) are defined by the following rules.\nreadout\na\nH,\nˆ\nD◦\nˇ\nP\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(\na::\nˇ\nP T\n∣\n∣\n〈ˆv〉;\nˆ\nX,\nˆ\nM\n)\nˆ\nD◦own:=\nˆ\nDhot◦immut\nβ\n:= cold\nβ\ncold\nα\n◦immut\nβ\n:= cold\nα\nreadout\na\nH,hot\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,hot\n(\na::mut\nβ\nT\n∣\n∣\n〈ˆv,x〉;\nˆ\nX⊕{|give\nβ\n(∗a;x::T)|},\nˆ\nM\n)\nreadout\na\nH,cold\nβ\n(∗a::T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,cold\nβ\n(\na::mut\nβ\n′\nT\n∣\n∣\n〈ˆv,−〉;\nˆ\nX,\nˆ\nM\n)\nreadout\n†α\nH,\nˆ\nD\n(∗a::T|x;{|take\n†α\n(∗a;x::T)|},∅)\nH(a) =a\n′\nreadout\na\nH,\nˆ\nD\n(a\n′\n::P T|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::P T|ˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|})\nˆ\nD\na\n(a) :=\n{\nhot\na\n(a)(\nˆ\nD= hot)\ncold\nβ\n(a)    (\nˆ\nD= cold\nβ\n)\nreadout\na\nH,\nˆ\nD\n(∗a::T[μX.T/X]|ˆv;\nˆ\nX,\nˆ\nM)\nreadout\na\nH,\nˆ\nD\n(∗a::μX.T|ˆv;\nˆ\nX,\nˆ\nM)\nH(a) =n\nreadout\na\nH,\nˆ\nD\n(∗a::int|n;∅,{|\nˆ\nD\na\n(a)|})\nreadout\na\nH,\nˆ\nD\n(∗a::unit|();∅,∅)\nH(a) =i∈[2]    readout\na\nH,\nˆ\nD\n(∗(a+1) ::T\ni\n|ˆv;\nˆ\nX,\nˆ\nM)n\n0\n= (#T\n1−i\n−#T\ni\n)\n≥0\nfor anyk∈[n\n0\n],H(a+1+#T\ni\n+k) = 0\nˆ\nM\n0\n={|\nˆ\nD\na\n(a+1+#T\ni\n+k)|k∈[n\n0\n]|}\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n+T\n1\n∣\n∣\ninj\ni\nˆv;\nˆ\nX,\nˆ\nM⊕{|\nˆ\nD\na\n(a)|}⊕\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n∣\n∣\nˆv\n0\n;\nˆ\nX\n0\n,\nˆ\nM\n0\n)\nreadout\na\nH,\nˆ\nD\n(\n∗(a+ #T\n0\n) ::T\n1\n∣\n∣\nˆv\n1\n;\nˆ\nX\n1\n,\nˆ\nM\n1\n)\nreadout\na\nH,\nˆ\nD\n(\n∗a::T\n0\n×T\n1\n∣\n∣\n(ˆv\n0\n,ˆv\n1\n);\nˆ\nX\n0\n⊕\nˆ\nX\n1\n,\nˆ\nM\n0\n⊕\nˆ\nM\n1\n)\nNext, ‘readout\nH,Θ\n(F::Γ|F;\nˆ\nX,\nˆ\nM)’ (the stack frameFrespecting the vari-\nable contextΓcan be read out fromHas an abstract stack frameF, yielding\nˆ\nXand\nˆ\nM, under the lifetime parameter contextΘ) is defined as follows.\ndomF= domΓfor anyx:\na\nT∈Γ,readout\na\nH,hot\n(F(x) ::TΘ|t\nx\n;\nˆ\nX\nx\n,\nˆ\nM\nx\n)\nreadout\nH,Θ\n(\nF::Γ\n∣\n∣\n{(x,t\nx\n)|x∈domΓ};\n⊕\nx∈domΓ\nˆ\nX\nx\n,\n⊕\nx∈domΓ\nˆ\nM\nx\n)\nFinally, ‘readout\nΠ\n(C| C;\nˆ\nX,\nˆ\nM)’ (the data of the concrete configurationC\ncan be read out as the abstract configurationC, yielding\nˆ\nXand\nˆ\nM, under the\n\nRustHorn: CHC-based Verification for Rust Programs (full version)47\nprogramΠ) is defined as follows.\nfor anyi∈[n+1],readout\nH,Θ\ni\n(F\ni\n::Γ\nΠ,f\ni\n,L\ni\n|F\ni\n;\nˆ\nX\ni\n,\nˆ\nM\ni\n)\nreadout\nΠ\n(\n[f\n0\n,L\n0\n]F\n0\n; [f\n1\n,L\n1\n]x\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]x\nn\n,F\nn\n|H\n∣\n∣\n[f\n0\n,L\n0\n]\nΘ\n0\nF\n0\n; [f\n1\n,L\n1\n]\nΘ\n1\nx\n1\n,F\n1\n;···; [f\nn\n,L\nn\n]\nΘ\nn\nx\nn\n,F\nn\n|\nA\n;\n⊕\nn\ni=0\nˆ\nX\ni\n,\n⊕\nn\ni=0\nˆ\nM\ni\n)\nSafety.We define the safety on extended abstract variable summaries and ex-\ntended memory footprints.\n‘safe\nA\n(x,\nˆ\nX)’ is defined as follows.\nˆ\nX(x) ={|give\nα\n(∗a;x::T),take\nβ\n(∗a;x::T\n′\n)|}T∼\nA\nT\n′\nα≤\nA\nβ\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x) =∅\nsafe\nA\n(x,\nˆ\nX)\nˆ\nX(x): the multiset of items of form ‘give\nγ\n(∗b;x::U)’/‘take\nγ\n(∗b;x::U)’ in\nˆ\nX\n‘safe\nA\n(\nˆ\nX)’ means that safe\nA\n(x,\nˆ\nX) holds for anyx.\n‘safe\nA\n(a,\nˆ\nM)’ is defined as follows.\nˆ\nM(a) ={hot\na\n(a)}\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) =∅\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a) ={|hot\n†α\n(a),cold\nβ\n0\n(a),...,cold\nβ\nn−1\n(a)|}for anyi∈[n], β\ni\n≤\nA\nα\nsafe\nA\n(a,\nˆ\nM)\nˆ\nM(a): the multiset of items of form hot\na\n(a)/cold\nα\n(a) in\nˆ\nM\n‘safe\nA\n(\nˆ\nM)’ means that safe\nA\n(a,\nˆ\nM) holds for any addressa.\nSafe Readout.Finally, ‘safe\nΠ\n(C| C)’ (the data of the concrete configuration\nCcan besafelyread out as the abstract configurationCunderΠ) is defined as\nfollows.\nreadout\nΠ\n(C|C;\nˆ\nX,\nˆ\nM)    lifetimeSafe(C)C=···|\nA\nsafe\nA\n(\nˆ\nX)    safe\nA\n(\nˆ\nM)\nsafe\nΠ\n(C|C)\n‘safe\nΠ\n(C)’ means that safe\nΠ\n(C|C) holds for someC.\nProperty 2  (Safety on a Concrete Configuration Ensures Progression).For any\nΠandCsuch that safe\nΠ\n(C) holds and final\nΠ\n(C) does not hold, there exists\nsomeC\n′\nsatisfyingC→\nΠ\nC\n′\n.\nProof.Clear. One important guarantee the safety provides is that the data is\nstored in the heap in an expected form.ut\nLemma 4  (Safe Readout Ensures Safety on the Abstract Configura-\ntion).ForΠ,CandCsuch thatsafe\nΠ\n(C|C)holds,safe\nΠ\n(C)holds.\nProof.By  straightforward  induction  over  the  judgment  deduction.  Note  that\nsafety on aextendedabstract variable summary is in fact an extension of safety\non an abstract variable summary.ut\n\n48Y. Matsushita et al.\nBisimulation Lemma.The safe readout defined above is actually abisimula-\ntionbetween concrete and abstract operational semantics.\nLemma 5  (Bisimulation between Concrete and Abstract Operational\nSemantics).Take anyΠ,CandCsatisfyingsafe\nΠ\n(C|C).\nFor  anyC\n′\nsatisfyingC→\nΠ\nC\n′\n,  there  existsC\n′\nsatisfyingC →\nΠ\nC\n′\nand\nsafe\nΠ\n(C\n′\n| C\n′\n).  Likewise,  for  anyC\n′\nsatisfyingC →\nΠ\nC\n′\nholds,  there  existsC\n′\nsatisfyingC→\nΠ\nC\n′\nandsafe\nΠ\n(C\n′\n|C\n′\n).\nProof.How to takeC\n′\naccording toC\n′\nand vice versa can be decided in a straight-\nforward way that we do not explicitly describe here. The property safe\nΠ\n(C\n′\n|C\n′\n)\ncan be justified by the following observations.\nNo Unexpected Changes on Unrelated Data.The safety on the extended memory\nfootprint ensures that operations on hotly accessed data do not affect unrelated\ndata.  Here,  the  following  property  plays  a  role:  when  readout\nH,hot\n(a::P T|\nˆv;\nˆ\nX,\nˆ\nM) holds andPis of formownormut\nα\n,{|hot(a+k)|k∈[#T]|} ⊆\nˆ\nM\nholds.\nPreservation of the Safety on the Extended Abstract Variable Summary.It can\nbe shown in a similar way to the proof of Lemma 1.\nPreservation of Safety on the Extended Memory Footprint.It can be shown by\nstraightforward case analysis.\nOne  important  point  is  that,  on  lifetime  elimination  (nowα),  a  frozen  hot\naccess (hot\n†α\n(a)) can be safely made active (hot\nfl\n(a)), because there are no cold\naccesses ona, which is guaranteed by the type system.\nAnother  point  is  that  swap  (swap(∗x,∗y))  does  not  change  the  extended\nmemory footprint.ut\nProperty 3  (Safety on the Concrete Configuration is Preserved).For anyΠand\nC,C\n′\nsuch that safe\nΠ\n(C) andC→\nΠ\nC\n′\nhold, safe\nΠ\n(C\n′\n) is satisfied.\nProof.It immediately follows by Lemma 5.ut\nC.6    Equivalence of the COS-based and AOS-based Models\nAfter  introducing  some  easy  lemmas,  we  prove  the  equivalence  of  the  COS-\nbased and AOS-based models (Theorem 3), relying on the bisimulation lemma\nLemma 5 proved above. Finally, we achieve the complete proof of Theorem 1.\nLemma 6.Take  anyΠ,  simplefandL.  For  anyF,HandF,  the  following\nequivalence holds.\nsafe\nH\n(F::Γ\nΠ,f,L\n|F)⇐⇒safe\nΠ\n(\n[f,L]F|H\n∣\n∣\n[f,L]\n∅\nF |\n(∅,∅)\n)\n(Thesafe\nH\njudgment is defined in§3.3.)\nProof.By straightforward induction.ut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)49\nLemma 7.For  anyΠandCof  form[f,L]F|H,  whenfis  simple,  there  is\nat most oneCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward induction. The simpleness offhas made the situation\neasy, because abstract variables do not occur inC.ut\nLemma 8.For anyΠandCof form[f,L]F |, whenfis simple andCis safe,\nthere existsCsatisfyingsafe\nΠ\n(C|C).\nProof.By straightforward construction.ut\nTheorem 3  (Equivalence of the COS-based Model and the AOS-based\nModel).For anyΠand simplef,f\nCOS\nΠ\nis equivalent tof\nCOS\nΠ\n.\nProof.Let us show that\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)⇐⇒f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nholds  for  any  valuesv\n0\n,...,v\nn−1\n,wof  the  sorts  (|T\n0\n|),...,(|T\nn−1\n|),(|U|),  where\nΣ\nΠ,f\n= (x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U.\n(=⇒).By assumption, we can take concrete configurationsC\n0\n,...,C\nN\nsatisfy-\ning the following (for someL,y,F,H,F\n′\nandH\n′\n).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nBy Lemma 6, taking abstract configurations\nC\n0\n:= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\n′\nN\n:= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\n,\nwe have safe\nΠ\n(C\n0\n|C\n0\n) and safe\nΠ\n(C\nN\n|C\n′\nN\n). By Lemma 4, safe\nΠ\n(C\n0\n) also holds.\nBy Lemma 5, we can takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n···→\nΠ\nC\nN\n, final\nΠ\n(C\nN\n),\nand safe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nSince  safe\nΠ\n(C\nN\n| C\nN\n)  and  safe\nΠ\n(C\nN\n| C\n′\nN\n)  hold,  by  Lemma  7  we  have\nC\nN\n=C\n′\nN\n. Therefore,f\nAOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.\n(⇐=).By assumption, we can take abstract configurationsC\n0\n,...,C\nN\nsatisfying\nthe following (for someLandy).\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)\nC\n0\n= [f,entry]\n∅\n{(x\ni\n,v\ni\n)|i∈[n]}|\n(∅,∅)\nC\nN\n= [f,L]\n∅\n{(y,w)}|\n(∅,∅)\nBy Lemma 8, there existsC\n0\nsuch that safe\nΠ\n(C\n0\n|C\n0\n) holds. By Lemma 5,\nwe  can  takeC\n1\n,...,C\nN\nsatisfyingC\n0\n→\nΠ\n··· →\nΠ\nC\nN\n,  final\nΠ\n(C\nN\n),  and\nsafe\nΠ\n(C\nk+1\n|C\nk+1\n) (for anyk∈[N]).\nC\n0\nandC\nN\nhave form\nC\n0\n= [f,entry]F|H   C\nN\n= [f,L]F\n′\n|H\n′\n,\nand by Lemma 6 the following judgments hold.\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nTherefore,f\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w) holds.ut\nCombining the equivalences of Theorem 2 and Theorem 3, we finally achieve\nthe proof of Theorem 1.",
+    "dataFromArxiv": {
+      "id": "http://arxiv.org/abs/2002.09002v2",
+      "updated": "2020-06-11T06:31:16Z",
+      "published": "2020-02-20T20:28:08Z",
+      "title": "RustHorn: CHC-based Verification for Rust Programs (full version)",
+      "summary": "  Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n",
+      "author": [
+        {
+          "name": "Yusuke Matsushita"
+        },
+        {
+          "name": "Takeshi Tsukada"
+        },
+        {
+          "name": "Naoki Kobayashi"
+        }
+      ],
+      "arxiv:doi": {
+        "_": "10.1007/978-3-030-44914-8_18",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "link": [
+        {
+          "$": {
+            "title": "doi",
+            "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18",
+            "rel": "related"
+          }
+        },
+        {
+          "$": {
+            "href": "http://arxiv.org/abs/2002.09002v2",
+            "rel": "alternate",
+            "type": "text/html"
+          }
+        },
+        {
+          "$": {
+            "title": "pdf",
+            "href": "http://arxiv.org/pdf/2002.09002v2",
+            "rel": "related",
+            "type": "application/pdf"
+          }
+        }
+      ],
+      "arxiv:comment": {
+        "_": "Full version of the same-titled paper in ESOP2020",
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "arxiv:primary_category": {
+        "$": {
+          "xmlns:arxiv": "http://arxiv.org/schemas/atom",
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      },
+      "category": {
+        "$": {
+          "term": "cs.PL",
+          "scheme": "http://arxiv.org/schemas/atom"
+        }
+      }
+    }
+  },
+  "book_0262162091_ch01.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch01.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nhogehoge",
+    "userSpecifiedTitle": "Types and Programming Languages_ch01",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_ch02.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "ch02.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nfugafuga",
+    "userSpecifiedTitle": "Types and Programming Languages_ch02",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  },
+  "book_0262162091_title.pdf": {
+    "idType": "book",
+    "path": [
+      "dummyTapl",
+      "title.pdf"
+    ],
+    "tags": [],
+    "comments": "",
+    "text": "\n\nFile: Untitled Document 1Page 1 of 1\nISBN 0-262-16209-1",
+    "userSpecifiedTitle": "Types and Programming Languages_title",
+    "dataFromNodeIsbn": {
+      "title": "Types and Programming Languages",
+      "authors": [
+        "Benjamin C. Pierce"
+      ],
+      "publisher": "MIT Press",
+      "publishedDate": "2002-01-04",
+      "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.",
+      "industryIdentifiers": [
+        {
+          "type": "ISBN_13",
+          "identifier": "9780262162098"
+        },
+        {
+          "type": "ISBN_10",
+          "identifier": "0262162091"
+        }
+      ],
+      "readingModes": {
+        "text": false,
+        "image": true
+      },
+      "pageCount": 646,
+      "printType": "BOOK",
+      "categories": [
+        "Computers"
+      ],
+      "maturityRating": "NOT_MATURE",
+      "allowAnonLogging": false,
+      "contentVersion": "preview-1.0.0",
+      "panelizationSummary": {
+        "containsEpubBubbles": false,
+        "containsImageBubbles": false
+      },
+      "imageLinks": {
+        "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api",
+        "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api"
+      },
+      "language": "en",
+      "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api",
+      "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api",
+      "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ"
+    }
+  }
+}
\ No newline at end of file

From 8b163f9225b14696a2dd7187deaa67cc34c00537 Mon Sep 17 00:00:00 2001
From: Akira Kawata <akirakawata1@gmail.com>
Date: Tue, 6 Feb 2024 22:49:19 +0900
Subject: [PATCH 3/6] Check in CI

---
 Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Dockerfile b/Dockerfile
index 32f8b4a..fb9054d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,6 +13,7 @@ RUN npm run check:prettier
 RUN npm run build
 RUN npm run test
 RUN npm run scan_test_pdfs
+RUN ./check_update_all_generated_DBs.sh
 RUN [ -f "edit_and_run.sh" ] && cat edit_and_run.sh && exit 1 || echo "Build DB succeeded"
 
 WORKDIR /jendeley/jendeley-frontend

From 8949e17a6a4a0363a2efd54ea0465fb9329c0cde Mon Sep 17 00:00:00 2001
From: Akira Kawata <akirakawata1@gmail.com>
Date: Tue, 6 Feb 2024 23:01:14 +0900
Subject: [PATCH 4/6] Fix CI

---
 Dockerfile | 2 +-
 run-CI.sh  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index fb9054d..3d4a846 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,7 +13,7 @@ RUN npm run check:prettier
 RUN npm run build
 RUN npm run test
 RUN npm run scan_test_pdfs
-RUN ./check_update_all_generated_DBs.sh
+RUN ./scripts/check_update_all_generated_DBs.sh
 RUN [ -f "edit_and_run.sh" ] && cat edit_and_run.sh && exit 1 || echo "Build DB succeeded"
 
 WORKDIR /jendeley/jendeley-frontend
diff --git a/run-CI.sh b/run-CI.sh
index e35b487..75aa47b 100755
--- a/run-CI.sh
+++ b/run-CI.sh
@@ -1,5 +1,5 @@
 #! /bin/bash -eux
 
-docker build . -f ./Dockerfile
-docker build . -f ./Releasable.Dockerfile
-docker build . -f ./shellcheck.Dockerfile
+docker build . -f ./Dockerfile --network=host
+docker build . -f ./Releasable.Dockerfile --network=host
+docker build . -f ./shellcheck.Dockerfile --network=host

From 626dfd02c63101366eaee10db53284567b5aa7e9 Mon Sep 17 00:00:00 2001
From: Akira Kawata <akirakawata1@gmail.com>
Date: Tue, 6 Feb 2024 23:20:45 +0900
Subject: [PATCH 5/6] Update DB version in update_db subcommand

---
 jendeley-backend/src/update_db.ts | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/jendeley-backend/src/update_db.ts b/jendeley-backend/src/update_db.ts
index d8cec62..25e0691 100644
--- a/jendeley-backend/src/update_db.ts
+++ b/jendeley-backend/src/update_db.ts
@@ -7,7 +7,12 @@ import fetch from "node-fetch";
 import { Request as NFRequest } from "node-fetch";
 import { Either, genLeft, genRight } from "./either";
 import { validateJsonDB } from "./validate_db";
-import { ENTRY_AUTHORS, ENTRY_TEXT, ENTRY_TITLE } from "./constants";
+import {
+  ENTRY_AUTHORS,
+  ENTRY_TEXT,
+  DB_META_KEY,
+  JENDELEY_VERSION,
+} from "./constants";
 
 async function getTextsFromPDF(
   pdfFullpath: string,
@@ -79,6 +84,7 @@ async function update_db(dbPathVer1: string[], dbPathVer2: string[]) {
       jsonDB[id][ENTRY_AUTHORS] = [];
     }
   }
+  jsonDB[DB_META_KEY]["version"] = JENDELEY_VERSION;
 
   if (fs.existsSync(concatDirs(dbPathVer2))) {
     logger.fatal(dbPathVer2 + " already exists.");

From d6675a76a152c7ba9c0625700fe6d43c8b52a441 Mon Sep 17 00:00:00 2001
From: Akira Kawata <akirakawata1@gmail.com>
Date: Tue, 6 Feb 2024 23:34:59 +0900
Subject: [PATCH 6/6] Add check file

---
 .../scripts/check_update_all_generated_DBs.sh   | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100755 jendeley-backend/scripts/check_update_all_generated_DBs.sh

diff --git a/jendeley-backend/scripts/check_update_all_generated_DBs.sh b/jendeley-backend/scripts/check_update_all_generated_DBs.sh
new file mode 100755
index 0000000..28daf87
--- /dev/null
+++ b/jendeley-backend/scripts/check_update_all_generated_DBs.sh
@@ -0,0 +1,17 @@
+#! /bin/bash
+set -eux
+
+rootdir=$(git rev-parse --show-toplevel)
+workdir=$(mktemp -d -t jendeley_check_update_all_generated_DBs_XXXXX)
+cd ${rootdir}/jendeley-backend
+
+ok=0
+for db in $(find ${rootdir}/jendeley-backend/generated_DBs -name "*.json" | sort); do
+    echo Checking $db
+    node --require source-map-support/register dist/index.js update_db --db1 ${db} --db2 ${workdir}/updated_$(basename $db)
+    if [[ ! -f ${workdir}/updated_$(basename $db) ]]; then
+        ok=1
+    fi
+done
+echo Check ${workdir} for updated DBs
+exit $ok